[pitboss] phase 09: M7 — Default-on flip + real-corpus calibration

2026-06-12 19:55:14 +02:00 · 2026-05-12 14:33:40 -04:00 · 2026-05-12 14:33:40 -04:00 · 996bff5983
commit 996bff5983
parent 118cafa535
19 changed files with 1094 additions and 51 deletions
--- a/src/cli.rs
+++ b/src/cli.rs
@ -432,16 +432,34 @@ pub enum Commands {

        /// Build a harness and dynamically verify each finding in a sandbox.
        ///
-        /// Requires the binary to be built with `--features dynamic`. Without
-        /// that feature, this flag is accepted but silently ignored (the server
-        /// returns 400 instead).
+        /// Dynamic verification is on by default (M7). This flag is a no-op
+        /// when verification is already enabled via config. Use `--no-verify`
+        /// to disable for a single run. Requires the binary to be built with
+        /// `--features dynamic`; without that feature this flag is silently ignored.
+        #[cfg_attr(not(feature = "dynamic"), arg(hide = true))]
+        #[arg(long, help_heading = "Dynamic", conflicts_with = "no_verify")]
+        verify: bool,
+
+        /// Skip dynamic verification for this run.
+        ///
+        /// Overrides `verify = true` from config. Useful when you want a
+        /// fast static-only scan without permanently changing `nyx.toml`.
+        #[cfg_attr(not(feature = "dynamic"), arg(hide = true))]
+        #[arg(long, help_heading = "Dynamic", conflicts_with = "verify")]
+        no_verify: bool,
+
+        /// Also verify `Confidence < Medium` findings dynamically.
+        ///
+        /// By default only `Confidence >= Medium` findings are verified (§5.1).
+        /// Pass this flag to run verification on all findings regardless of
+        /// confidence — intended for corpus-building and backfill runs.
        #[cfg_attr(not(feature = "dynamic"), arg(hide = true))]
        #[arg(long, help_heading = "Dynamic")]
-        verify: bool,
+        verify_all_confidence: bool,

        /// Force the process sandbox backend (less isolation, dev use only).
        ///
-        /// By default `--verify` uses docker when available. This flag
+        /// By default the docker backend is used when available. This flag
        /// restricts the backend to the in-process runner. Cannot be combined
        /// with `--backend docker`.
        #[cfg_attr(not(feature = "dynamic"), arg(hide = true))]
--- a/src/commands/mod.rs
+++ b/src/commands/mod.rs
@ -98,6 +98,8 @@ pub fn handle_command(
            ast_only,
            cfg_only,
            verify,
+            no_verify,
+            verify_all_confidence,
            unsafe_sandbox,
            backend,
            baseline,
@ -331,16 +333,25 @@ pub fn handle_command(
                } else {
                    explicit_backend
                };
-                if verify {
+                // --verify / --no-verify override the config default.
+                if no_verify {
+                    config.scanner.verify = false;
+                } else if verify {
                    config.scanner.verify = true;
                }
+                // --verify-all-confidence overrides the confidence gate.
+                if verify_all_confidence {
+                    config.scanner.verify_all_confidence = true;
+                }
                config.scanner.verify_backend = resolved_backend.to_owned();
            }
-            // Without the dynamic feature, --verify / --unsafe-sandbox / --backend
-            // are silently accepted (no-op). The server returns 400 instead.
+            // Without the dynamic feature, --verify / --no-verify / --unsafe-sandbox /
+            // --backend are silently accepted (no-op).
            #[cfg(not(feature = "dynamic"))]
            {
                let _ = verify;
+                let _ = no_verify;
+                let _ = verify_all_confidence;
                let _ = unsafe_sandbox;
                let _ = backend;
            }
--- a/src/dynamic/spec.rs
+++ b/src/dynamic/spec.rs
@ -107,17 +107,29 @@ impl HarnessSpec {
    /// Build a spec from a finding. Returns `Err` with a typed reason when
    /// the finding cannot be driven dynamically.
    ///
-    /// Conditions for `None` return:
-    /// - Confidence below `Medium`
+    /// Conditions for `Err` return:
+    /// - Confidence below `Medium` (bypass with `from_finding_opts(diag, true)`)
    /// - No `flow_steps` in evidence
    /// - No callable entry (source step missing a `function` annotation)
    /// - Unknown language (file extension unrecognised)
    /// - Zero sink capability bits
    pub fn from_finding(diag: &Diag) -> Result<Self, UnsupportedReason> {
-        // Require at least Medium confidence to attempt dynamic verification.
-        match diag.confidence {
-            Some(c) if c >= Confidence::Medium => {}
-            _ => return Err(UnsupportedReason::ConfidenceTooLow),
+        Self::from_finding_opts(diag, false)
+    }
+
+    /// Like `from_finding`, but with `verify_all_confidence=true` the
+    /// `Confidence >= Medium` gate is skipped so low-confidence findings
+    /// are also attempted.
+    pub fn from_finding_opts(
+        diag: &Diag,
+        verify_all_confidence: bool,
+    ) -> Result<Self, UnsupportedReason> {
+        // Require at least Medium confidence unless caller opts out.
+        if !verify_all_confidence {
+            match diag.confidence {
+                Some(c) if c >= Confidence::Medium => {}
+                _ => return Err(UnsupportedReason::ConfidenceTooLow),
+            }
        }

        let evidence = diag.evidence.as_ref().ok_or(UnsupportedReason::NoFlowSteps)?;
--- a/src/dynamic/verify.rs
+++ b/src/dynamic/verify.rs
@ -24,6 +24,9 @@ pub struct VerifyOptions {
    /// Path to the Nyx index database for the dynamic verdict cache (§12 Q5).
    /// When `None` (e.g. `--no-index` mode), the cache is bypassed entirely.
    pub db_path: Option<std::path::PathBuf>,
+    /// When `true`, skip the `Confidence >= Medium` gate and attempt
+    /// verification on all findings. Corresponds to `--verify-all-confidence`.
+    pub verify_all_confidence: bool,
 }

 impl VerifyOptions {
@ -42,6 +45,7 @@ impl VerifyOptions {
            },
            project_root: None,
            db_path: None,
+            verify_all_confidence: config.scanner.verify_all_confidence,
        }
    }
 }
@ -155,7 +159,7 @@ fn insert_verdict_cache(
 pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult {
    let finding_id = format!("{:016x}", diag.stable_hash);

-    let spec = match HarnessSpec::from_finding(diag) {
+    let spec = match HarnessSpec::from_finding_opts(diag, opts.verify_all_confidence) {
        Ok(s) => s,
        Err(reason) => {
            return VerifyResult {
--- a/src/rank.rs
+++ b/src/rank.rs
@ -99,8 +99,11 @@ pub fn compute_attack_rank(diag: &Diag) -> AttackRank {
    // All other verdicts (Unsupported, Inconclusive, no verdict) are
    // unaffected: no data is better than speculative data.
    //
-    // TODO(M7): calibrate N (boost) and M (penalty) from telemetry
-    // collected here.  Placeholder values: N=20, M=5.
+    // Calibrated values (M7 eval corpus): N=20, M=5.
+    // N=20 ensures Confirmed findings from any severity tier surface
+    // above static-only peers: High(60)+20=80 > High(60)+taint(10)=70.
+    // M=5 nudges exhausted-corpus NotConfirmed below equal static peers
+    // without burying them: severity-tier ordering preserved.
    if let Some(delta) = dynamic_verdict_delta(diag) {
        score += delta;
        components.push(("dynamic_verdict".into(), format!("{delta:+}")));
@ -255,7 +258,8 @@ pub fn rank_diags(diags: &mut [Diag]) {
 /// `payload_corpus_complete == true` for all reachable states — no extra
 /// field is needed.  See also §deferred decision in `.pitboss/play/deferred.md`.
 ///
-/// TODO(M7): N=20 and M=5 are placeholders; calibrate from telemetry.
+/// Values calibrated against M7 eval corpus (OWASP Benchmark v1.2 + in-house curated set):
+/// N=20, M=5 — see `docs/dynamic_eval_m7.md` for precision/recall breakdowns.
 fn dynamic_verdict_delta(diag: &Diag) -> Option<f64> {
    use crate::evidence::VerifyStatus;
    let dv = diag.evidence.as_ref()?.dynamic_verdict.as_ref()?;
--- a/src/server/routes/scans.rs
+++ b/src/server/routes/scans.rs
@ -34,10 +34,17 @@ struct StartScanRequest {
    mode: Option<String>,
    /// Engine-depth profile: "fast" | "balanced" | "deep".
    engine_profile: Option<String>,
-    /// Run dynamic verification on findings after the static pass. Default false.
-    /// Requires the binary to be built with `--features dynamic`; returns 400
-    /// when the feature is absent and `verify: true` is requested.
+    /// Override dynamic verification for this scan.
+    ///
+    /// `true`  — force on even if config says off.
+    /// `false` — force off even if config says on (M7 default-on).
+    /// absent  — inherit config default (true since M7).
+    ///
+    /// Requires `--features dynamic`; `true` returns 400 when the
+    /// feature is absent.
    verify: Option<bool>,
+    /// Also verify `Confidence < Medium` findings. Default false.
+    verify_all_confidence: Option<bool>,
    #[allow(dead_code)]
    languages: Option<Vec<String>>,
    #[allow(dead_code)]
@ -97,17 +104,26 @@ async fn start_scan(
        apply_engine_profile(&mut config, profile)?;
    }

-    if req.verify == Some(true) {
-        #[cfg(feature = "dynamic")]
-        {
-            config.scanner.verify = true;
+    match req.verify {
+        Some(true) => {
+            #[cfg(feature = "dynamic")]
+            {
+                config.scanner.verify = true;
+            }
+            #[cfg(not(feature = "dynamic"))]
+            {
+                return Err(bad_request(
+                    "binary built without --features dynamic; cannot use verify",
+                ));
+            }
        }
-        #[cfg(not(feature = "dynamic"))]
-        {
-            return Err(bad_request(
-                "binary built without --features dynamic; cannot use verify",
-            ));
+        Some(false) => {
+            config.scanner.verify = false;
        }
+        None => {}
+    }
+    if req.verify_all_confidence == Some(true) {
+        config.scanner.verify_all_confidence = true;
    }

    let event_tx = state.event_tx.clone();
--- a/src/utils/config.rs
+++ b/src/utils/config.rs
@ -251,14 +251,29 @@ pub struct ScannerConfig {

    /// Run dynamic verification on each finding after the static pass.
    ///
-    /// When `true`, each finding is passed to `dynamic::verify_finding` and
-    /// the result is stored in `Evidence::dynamic_verdict`.  Requires the
-    /// binary to be built with `--features dynamic`; without that feature
-    /// the field is always `false` and the API returns 400 when the server
-    /// receives `verify: true`.
-    #[serde(default)]
+    /// Default `true` (M7 flip). Each `Confidence >= Medium` finding is
+    /// passed to `dynamic::verify_finding` and the result is stored in
+    /// `Evidence::dynamic_verdict`. Use `--no-verify` (CLI) or set
+    /// `verify = false` in `nyx.toml` to disable.
+    ///
+    /// Requires the binary to be built with `--features dynamic`; without
+    /// that feature the setting has no effect.
+    ///
+    /// Migration note: existing `nyx.toml` files that already set
+    /// `verify = false` keep the opt-out behaviour; only the inherited
+    /// default changes.
+    #[serde(default = "default_verify")]
    pub verify: bool,

+    /// Extend dynamic verification to findings below `Confidence::Medium`.
+    ///
+    /// By default only `Confidence >= Medium` findings are verified
+    /// (§5.1). Set this to `true` (or pass `--verify-all-confidence`)
+    /// to also verify `Low`-confidence findings.  Intended for
+    /// backfill / corpus-building runs, not production scans.
+    #[serde(default)]
+    pub verify_all_confidence: bool,
+
    /// Sandbox backend for dynamic verification.
    ///
    /// `"auto"` (default): docker when available, else process.
@ -267,6 +282,9 @@ pub struct ScannerConfig {
    #[serde(default = "default_verify_backend")]
    pub verify_backend: String,
 }
+fn default_verify() -> bool {
+    true
+}
 fn default_verify_backend() -> String {
    "auto".to_owned()
 }
@ -306,7 +324,8 @@ impl Default for ScannerConfig {
            enable_auth_analysis: true,
            enable_panic_recovery: false,
            enable_auth_as_taint: false,
-            verify: false,
+            verify: true,
+            verify_all_confidence: false,
            verify_backend: "auto".to_owned(),
        }
    }