mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-30 20:39:39 +02:00
Dynamic (#77)
This commit is contained in:
parent
55247b7fcd
commit
991c84a1eb
1464 changed files with 225448 additions and 1985 deletions
686
benches/dynamic_bench.rs
Normal file
686
benches/dynamic_bench.rs
Normal file
|
|
@ -0,0 +1,686 @@
|
|||
//! Dynamic verification benchmarks (§8.4).
|
||||
//!
|
||||
//! Tracks the per-scan cost anchors:
|
||||
//!
|
||||
//! 1. `harness_build_cold` — fresh workdir, spec → BuiltHarness (source gen + disk write).
|
||||
//! 2. `harness_build_warm` — same spec, workdir already staged (file write skipped).
|
||||
//! 3. `sandbox_run_payload` — single payload run via process backend against
|
||||
//! sqli_positive.py (subprocess + settrace overhead, no networking).
|
||||
//! 4. `docker_image_build` — cold image pull/build for the python:3-slim base.
|
||||
//! 5. `docker_exec_warm` — `docker exec` into a running container (no cold start).
|
||||
//! 6. `docker_payload_cost` — per-payload sandbox cost via docker backend end-to-end.
|
||||
//! 7. `composite_chain_reverify_dispatch` — `reverify_top_chains` on a
|
||||
//! synthetic 3-member chain with no member diags. Measures the no-derive
|
||||
//! dispatch path (chain_step_specs miss, early-exit build/run loops,
|
||||
//! Inconclusive verdict allocation, severity downgrade).
|
||||
//! 8. `composite_chain_reverify_stub_confirmed` — same chain shape, stubbed
|
||||
//! reverifier returning `Confirmed`. Measures the apply-verdict happy path
|
||||
//! (no severity bucket change).
|
||||
//! 9. `composite_chain_reverify_top_n_slice` — 5-chain slice with `top_n=3`.
|
||||
//! Measures the slice traversal cost so a regression that walks the full
|
||||
//! slice instead of the prefix is visible.
|
||||
//! 10. `composite_chain_reverify_replay_stable` — same chain shape as
|
||||
//! `stub_confirmed`, but with `VerifyOptions::replay_stable_check=true`
|
||||
//! and a stub that stamps `replay_stable=Some(true)`. Anchors the
|
||||
//! apply-verdict allocation cost when the telemetry stability field
|
||||
//! is populated; a regression that adds per-chain work behind the
|
||||
//! replay opt-in (e.g. an extra run_chain_steps call leaking out of
|
||||
//! the live path into the stub layer) shows up here.
|
||||
//!
|
||||
//! Wall-clock budget anchors for the composite reverify path: the live
|
||||
//! process backend stays under 400ms per 3-member chain, the docker
|
||||
//! backend under 1500ms. Those live-run numbers are covered by the
|
||||
//! `flask_eval_chain_reverify_populates_dynamic_verdict` integration
|
||||
//! test in `tests/chain_emission_e2e.rs`; the microbenches here anchor
|
||||
//! the dispatch + verdict-application overhead so regressions on the
|
||||
//! API-shape half land in the criterion baseline.
|
||||
//!
|
||||
//! Baselines committed to `benches/dynamic_bench_baseline.json`.
|
||||
//! Run: `cargo bench --features dynamic -- dynamic`
|
||||
//!
|
||||
//! Docker benchmarks are no-ops when docker is unavailable (skipped, not failed).
|
||||
|
||||
use criterion::{Criterion, criterion_group, criterion_main};
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
use nyx_scanner::dynamic::spec::{
|
||||
EntryKind, HarnessSpec, JavaToolchain, PayloadSlot, SpecDerivationStrategy,
|
||||
};
|
||||
#[cfg(feature = "dynamic")]
|
||||
use nyx_scanner::labels::Cap;
|
||||
#[cfg(feature = "dynamic")]
|
||||
use nyx_scanner::symbol::Lang;
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn make_rust_sqli_spec() -> HarnessSpec {
|
||||
HarnessSpec {
|
||||
finding_id: "bench_rust_0001".into(),
|
||||
entry_file: "tests/dynamic_fixtures/rust/sqli_positive.rs".into(),
|
||||
entry_name: "run".into(),
|
||||
entry_kind: nyx_scanner::dynamic::spec::EntryKind::Function,
|
||||
lang: Lang::Rust,
|
||||
toolchain_id: "rust-stable".into(),
|
||||
payload_slot: PayloadSlot::Param(0),
|
||||
expected_cap: Cap::SQL_QUERY,
|
||||
constraint_hints: vec![],
|
||||
sink_file: "tests/dynamic_fixtures/rust/sqli_positive.rs".into(),
|
||||
sink_line: 18,
|
||||
spec_hash: "benchrustsqli0001".into(),
|
||||
derivation: SpecDerivationStrategy::FromFlowSteps,
|
||||
stubs_required: vec![],
|
||||
framework: None,
|
||||
java_toolchain: JavaToolchain::default(),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn make_sqli_spec() -> HarnessSpec {
|
||||
HarnessSpec {
|
||||
finding_id: "bench0000000001".into(),
|
||||
entry_file: "tests/dynamic_fixtures/python/sqli_positive.py".into(),
|
||||
entry_name: "login".into(),
|
||||
entry_kind: EntryKind::Function,
|
||||
lang: Lang::Python,
|
||||
toolchain_id: "python-3".into(),
|
||||
payload_slot: PayloadSlot::Param(0),
|
||||
expected_cap: Cap::SQL_QUERY,
|
||||
constraint_hints: vec![],
|
||||
sink_file: "tests/dynamic_fixtures/python/sqli_positive.py".into(),
|
||||
sink_line: 7,
|
||||
spec_hash: "benchsqli000001".into(),
|
||||
derivation: SpecDerivationStrategy::FromFlowSteps,
|
||||
stubs_required: vec![],
|
||||
framework: None,
|
||||
java_toolchain: JavaToolchain::default(),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn bench_harness_build_cold(c: &mut Criterion) {
|
||||
use nyx_scanner::dynamic::harness;
|
||||
let spec = make_sqli_spec();
|
||||
c.bench_function("harness_build_cold", |b| {
|
||||
b.iter(|| {
|
||||
let workdir = std::env::temp_dir()
|
||||
.join("nyx-harness")
|
||||
.join(&spec.spec_hash);
|
||||
let _ = std::fs::remove_dir_all(&workdir);
|
||||
harness::build(&spec).expect("harness build")
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn bench_harness_build_warm(c: &mut Criterion) {
|
||||
use nyx_scanner::dynamic::harness;
|
||||
let spec = make_sqli_spec();
|
||||
harness::build(&spec).expect("harness pre-stage");
|
||||
c.bench_function("harness_build_warm", |b| {
|
||||
b.iter(|| harness::build(&spec).expect("harness build warm"));
|
||||
});
|
||||
}
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn bench_sandbox_run_payload(c: &mut Criterion) {
|
||||
use nyx_scanner::dynamic::corpus::payloads_for;
|
||||
use nyx_scanner::dynamic::harness;
|
||||
use nyx_scanner::dynamic::sandbox::{self, SandboxOptions};
|
||||
|
||||
let spec = make_sqli_spec();
|
||||
let harness = harness::build(&spec).expect("harness build");
|
||||
let payloads = payloads_for(Cap::SQL_QUERY);
|
||||
let payload = payloads
|
||||
.iter()
|
||||
.find(|p| !p.is_benign)
|
||||
.expect("sqli payload");
|
||||
let opts = SandboxOptions {
|
||||
timeout: std::time::Duration::from_secs(10),
|
||||
..SandboxOptions::default()
|
||||
};
|
||||
|
||||
c.bench_function("sandbox_run_payload", |b| {
|
||||
b.iter(|| sandbox::run(&harness, payload.bytes, &opts).expect("sandbox run"));
|
||||
});
|
||||
}
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn docker_available() -> bool {
|
||||
std::process::Command::new("docker")
|
||||
.arg("info")
|
||||
.stdout(std::process::Stdio::null())
|
||||
.stderr(std::process::Stdio::null())
|
||||
.status()
|
||||
.map(|s| s.success())
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Cold docker image pull/build.
|
||||
///
|
||||
/// Measures the time to ensure `python:3-slim` is present locally. On a
|
||||
/// warm cache this is just an inspect call (sub-second). On a cold host it
|
||||
/// includes the pull from the registry.
|
||||
///
|
||||
/// Registers a labelled noop measurement when Docker is absent so criterion's
|
||||
/// output is never empty for this slot.
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn bench_docker_image_build(c: &mut Criterion) {
|
||||
if !docker_available() {
|
||||
c.bench_function("docker_image_build_no_docker", |b| b.iter(|| ()));
|
||||
return;
|
||||
}
|
||||
c.bench_function("docker_image_build", |b| {
|
||||
b.iter(|| {
|
||||
// `docker pull` is idempotent and fast when image is already local.
|
||||
let _ = std::process::Command::new("docker")
|
||||
.args(["pull", "python:3-slim"])
|
||||
.stdout(std::process::Stdio::null())
|
||||
.stderr(std::process::Stdio::null())
|
||||
.status();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/// Warm `docker exec` reuse benchmark.
|
||||
///
|
||||
/// Starts a single container before the benchmark loop and measures the cost
|
||||
/// of each `docker exec` call (no cold-start amortisation visible here — that
|
||||
/// is visible by comparing this vs `bench_docker_payload_cost`).
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn bench_docker_exec_warm(c: &mut Criterion) {
|
||||
if !docker_available() {
|
||||
eprintln!("bench_docker_exec_warm: docker unavailable, skipping");
|
||||
return;
|
||||
}
|
||||
// Start a long-lived container for the benchmark.
|
||||
let container = "nyx-bench-exec-warm";
|
||||
let _ = std::process::Command::new("docker")
|
||||
.args([
|
||||
"run",
|
||||
"-d",
|
||||
"--rm",
|
||||
"--name",
|
||||
container,
|
||||
"--cap-drop=ALL",
|
||||
"--security-opt",
|
||||
"no-new-privileges:true",
|
||||
"--network",
|
||||
"none",
|
||||
"python:3-slim",
|
||||
"sleep",
|
||||
"300",
|
||||
])
|
||||
.stdout(std::process::Stdio::null())
|
||||
.stderr(std::process::Stdio::null())
|
||||
.status();
|
||||
|
||||
c.bench_function("docker_exec_warm", |b| {
|
||||
b.iter(|| {
|
||||
let _ = std::process::Command::new("docker")
|
||||
.args(["exec", container, "python3", "-c", "pass"])
|
||||
.stdout(std::process::Stdio::null())
|
||||
.stderr(std::process::Stdio::null())
|
||||
.status();
|
||||
});
|
||||
});
|
||||
|
||||
let _ = std::process::Command::new("docker")
|
||||
.args(["stop", container])
|
||||
.stdout(std::process::Stdio::null())
|
||||
.stderr(std::process::Stdio::null())
|
||||
.status();
|
||||
}
|
||||
|
||||
/// Per-payload sandbox cost via docker backend end-to-end.
|
||||
///
|
||||
/// Measures the complete path: harness already built + docker backend +
|
||||
/// process the sqli_positive fixture. The first call includes container
|
||||
/// start; subsequent calls show exec-reuse cost.
|
||||
///
|
||||
/// Registers a labelled noop measurement when Docker is absent so criterion's
|
||||
/// output is never empty for this slot.
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn bench_docker_payload_cost(c: &mut Criterion) {
|
||||
if !docker_available() {
|
||||
c.bench_function("docker_payload_cost_no_docker", |b| b.iter(|| ()));
|
||||
return;
|
||||
}
|
||||
use nyx_scanner::dynamic::corpus::payloads_for;
|
||||
use nyx_scanner::dynamic::harness;
|
||||
use nyx_scanner::dynamic::sandbox::{self, SandboxBackend, SandboxOptions};
|
||||
|
||||
let spec = make_sqli_spec();
|
||||
let built = harness::build(&spec).expect("harness build");
|
||||
let payloads = payloads_for(Cap::SQL_QUERY);
|
||||
let payload = payloads
|
||||
.iter()
|
||||
.find(|p| !p.is_benign)
|
||||
.expect("sqli payload");
|
||||
let opts = SandboxOptions {
|
||||
timeout: std::time::Duration::from_secs(30),
|
||||
backend: SandboxBackend::Docker,
|
||||
..SandboxOptions::default()
|
||||
};
|
||||
|
||||
c.bench_function("docker_payload_cost", |b| {
|
||||
b.iter(|| {
|
||||
let _ = sandbox::run(&built, payload.bytes, &opts);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/// Rust harness build (source gen + disk write, no compilation).
|
||||
///
|
||||
/// Measures only `harness::build()` — staging files to the workdir.
|
||||
/// The expensive `cargo build --release` step is NOT included here
|
||||
/// (that is the province of an integration benchmark, not this microbench).
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn bench_rust_harness_build_cold(c: &mut Criterion) {
|
||||
use nyx_scanner::dynamic::harness;
|
||||
let spec = make_rust_sqli_spec();
|
||||
c.bench_function("rust_harness_build_cold", |b| {
|
||||
b.iter(|| {
|
||||
let workdir = std::env::temp_dir()
|
||||
.join("nyx-harness")
|
||||
.join(&spec.spec_hash);
|
||||
let _ = std::fs::remove_dir_all(&workdir);
|
||||
harness::build(&spec).expect("harness build")
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn make_js_sqli_spec() -> HarnessSpec {
|
||||
HarnessSpec {
|
||||
finding_id: "bench_js_0001".into(),
|
||||
entry_file: "tests/dynamic_fixtures/js/sqli_positive.js".into(),
|
||||
entry_name: "login".into(),
|
||||
entry_kind: nyx_scanner::dynamic::spec::EntryKind::Function,
|
||||
lang: Lang::JavaScript,
|
||||
toolchain_id: "node-20".into(),
|
||||
payload_slot: PayloadSlot::Param(0),
|
||||
expected_cap: Cap::SQL_QUERY,
|
||||
constraint_hints: vec![],
|
||||
sink_file: "tests/dynamic_fixtures/js/sqli_positive.js".into(),
|
||||
sink_line: 8,
|
||||
spec_hash: "benchjssqli000001".into(),
|
||||
derivation: SpecDerivationStrategy::FromFlowSteps,
|
||||
stubs_required: vec![],
|
||||
framework: None,
|
||||
java_toolchain: JavaToolchain::default(),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn make_go_sqli_spec() -> HarnessSpec {
|
||||
HarnessSpec {
|
||||
finding_id: "bench_go_0001".into(),
|
||||
entry_file: "tests/dynamic_fixtures/go/sqli_positive.go".into(),
|
||||
entry_name: "Login".into(),
|
||||
entry_kind: nyx_scanner::dynamic::spec::EntryKind::Function,
|
||||
lang: Lang::Go,
|
||||
toolchain_id: "go-1.21".into(),
|
||||
payload_slot: PayloadSlot::Param(0),
|
||||
expected_cap: Cap::SQL_QUERY,
|
||||
constraint_hints: vec![],
|
||||
sink_file: "tests/dynamic_fixtures/go/sqli_positive.go".into(),
|
||||
sink_line: 12,
|
||||
spec_hash: "benchgosqli000001".into(),
|
||||
derivation: SpecDerivationStrategy::FromFlowSteps,
|
||||
stubs_required: vec![],
|
||||
framework: None,
|
||||
java_toolchain: JavaToolchain::default(),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn make_java_sqli_spec() -> HarnessSpec {
|
||||
HarnessSpec {
|
||||
finding_id: "bench_java_0001".into(),
|
||||
entry_file: "tests/dynamic_fixtures/java/sqli_positive.java".into(),
|
||||
entry_name: "login".into(),
|
||||
entry_kind: nyx_scanner::dynamic::spec::EntryKind::Function,
|
||||
lang: Lang::Java,
|
||||
toolchain_id: "java-21".into(),
|
||||
payload_slot: PayloadSlot::Param(0),
|
||||
expected_cap: Cap::SQL_QUERY,
|
||||
constraint_hints: vec![],
|
||||
sink_file: "tests/dynamic_fixtures/java/sqli_positive.java".into(),
|
||||
sink_line: 9,
|
||||
spec_hash: "benchjavasqli00001".into(),
|
||||
derivation: SpecDerivationStrategy::FromFlowSteps,
|
||||
stubs_required: vec![],
|
||||
framework: None,
|
||||
java_toolchain: JavaToolchain::default(),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn make_php_sqli_spec() -> HarnessSpec {
|
||||
HarnessSpec {
|
||||
finding_id: "bench_php_0001".into(),
|
||||
entry_file: "tests/dynamic_fixtures/php/sqli_positive.php".into(),
|
||||
entry_name: "login".into(),
|
||||
entry_kind: nyx_scanner::dynamic::spec::EntryKind::Function,
|
||||
lang: Lang::Php,
|
||||
toolchain_id: "php-8".into(),
|
||||
payload_slot: PayloadSlot::Param(0),
|
||||
expected_cap: Cap::SQL_QUERY,
|
||||
constraint_hints: vec![],
|
||||
sink_file: "tests/dynamic_fixtures/php/sqli_positive.php".into(),
|
||||
sink_line: 9,
|
||||
spec_hash: "benchphpsqli000001".into(),
|
||||
derivation: SpecDerivationStrategy::FromFlowSteps,
|
||||
stubs_required: vec![],
|
||||
framework: None,
|
||||
java_toolchain: JavaToolchain::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// JS harness build (source gen + disk write).
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn bench_js_harness_build_cold(c: &mut Criterion) {
|
||||
use nyx_scanner::dynamic::harness;
|
||||
let spec = make_js_sqli_spec();
|
||||
c.bench_function("js_harness_build_cold", |b| {
|
||||
b.iter(|| {
|
||||
let workdir = std::env::temp_dir()
|
||||
.join("nyx-harness")
|
||||
.join(&spec.spec_hash);
|
||||
let _ = std::fs::remove_dir_all(&workdir);
|
||||
harness::build(&spec).expect("JS harness build")
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/// Go harness build (source gen + disk write, no compilation).
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn bench_go_harness_build_cold(c: &mut Criterion) {
|
||||
use nyx_scanner::dynamic::harness;
|
||||
let spec = make_go_sqli_spec();
|
||||
c.bench_function("go_harness_build_cold", |b| {
|
||||
b.iter(|| {
|
||||
let workdir = std::env::temp_dir()
|
||||
.join("nyx-harness")
|
||||
.join(&spec.spec_hash);
|
||||
let _ = std::fs::remove_dir_all(&workdir);
|
||||
harness::build(&spec).expect("Go harness build")
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/// Java harness build (source gen + disk write, no compilation).
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn bench_java_harness_build_cold(c: &mut Criterion) {
|
||||
use nyx_scanner::dynamic::harness;
|
||||
let spec = make_java_sqli_spec();
|
||||
c.bench_function("java_harness_build_cold", |b| {
|
||||
b.iter(|| {
|
||||
let workdir = std::env::temp_dir()
|
||||
.join("nyx-harness")
|
||||
.join(&spec.spec_hash);
|
||||
let _ = std::fs::remove_dir_all(&workdir);
|
||||
harness::build(&spec).expect("Java harness build")
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/// PHP harness build (source gen + disk write).
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn bench_php_harness_build_cold(c: &mut Criterion) {
|
||||
use nyx_scanner::dynamic::harness;
|
||||
let spec = make_php_sqli_spec();
|
||||
c.bench_function("php_harness_build_cold", |b| {
|
||||
b.iter(|| {
|
||||
let workdir = std::env::temp_dir()
|
||||
.join("nyx-harness")
|
||||
.join(&spec.spec_hash);
|
||||
let _ = std::fs::remove_dir_all(&workdir);
|
||||
harness::build(&spec).expect("PHP harness build")
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn mk_chain_member(hash: u64, idx: usize) -> nyx_scanner::chain::FindingRef {
|
||||
use nyx_scanner::surface::SourceLocation;
|
||||
nyx_scanner::chain::FindingRef {
|
||||
finding_id: format!("bench-chain-member-{idx}"),
|
||||
stable_hash: hash,
|
||||
location: SourceLocation::new("bench/synthetic.py", (idx as u32) + 1, 1),
|
||||
rule_id: "taint-unsanitised-flow".into(),
|
||||
cap_bits: 0,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn mk_synthetic_chain(hash: u64, members: usize) -> nyx_scanner::chain::ChainFinding {
|
||||
use nyx_scanner::chain::{ChainFinding, ChainSeverity, ChainSink, ImpactCategory};
|
||||
ChainFinding {
|
||||
stable_hash: hash,
|
||||
members: (0..members)
|
||||
.map(|i| mk_chain_member(hash.wrapping_add(i as u64 + 1), i))
|
||||
.collect(),
|
||||
sink: ChainSink {
|
||||
file: "bench/synthetic.py".into(),
|
||||
line: 99,
|
||||
col: 1,
|
||||
function_name: "sink".into(),
|
||||
cap_bits: 0,
|
||||
},
|
||||
implied_impact: ImpactCategory::Rce,
|
||||
severity: ChainSeverity::Critical,
|
||||
score: 100.0,
|
||||
dynamic_verdict: None,
|
||||
reverify_reason: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
struct BenchConfirmedReverifier;
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
impl nyx_scanner::chain::CompositeReverifier for BenchConfirmedReverifier {
|
||||
fn reverify(
|
||||
&self,
|
||||
_chain: &nyx_scanner::chain::ChainFinding,
|
||||
_member_diags: &[nyx_scanner::commands::scan::Diag],
|
||||
_surface: &nyx_scanner::surface::SurfaceMap,
|
||||
opts: &nyx_scanner::dynamic::verify::VerifyOptions,
|
||||
) -> nyx_scanner::evidence::VerifyResult {
|
||||
// Mirror `DefaultCompositeReverifier::reverify`'s replay-stable
|
||||
// stamping shape so the apply-verdict allocation cost matches
|
||||
// the live path when the opt-in is on. The stub does not
|
||||
// re-run any work (it has none to re-run) but the resulting
|
||||
// `VerifyResult` populates `replay_stable=Some(true)` so
|
||||
// downstream sites that branch on the field exercise the same
|
||||
// path they would for a real Confirmed-with-stable run.
|
||||
let replay_stable = if opts.replay_stable_check {
|
||||
Some(true)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
nyx_scanner::evidence::VerifyResult {
|
||||
finding_id: "bench".into(),
|
||||
status: nyx_scanner::evidence::VerifyStatus::Confirmed,
|
||||
triggered_payload: None,
|
||||
reason: None,
|
||||
inconclusive_reason: None,
|
||||
detail: None,
|
||||
attempts: vec![],
|
||||
toolchain_match: None,
|
||||
differential: None,
|
||||
replay_stable,
|
||||
wrong: None,
|
||||
hardening_outcome: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Phase 26 dispatch-cost anchor: synthetic 3-member chain with no
|
||||
/// matching member diags. The reverifier walks chain_step_specs (3
|
||||
/// HashMap misses → 3 NoFlowSteps errors), the build loop sees zero
|
||||
/// derived specs and exits early, the run loop sees zero built steps
|
||||
/// and exits early. The composed VerifyResult is allocated and applied
|
||||
/// via `apply_dynamic_verdict` (Inconclusive → severity downgrade).
|
||||
///
|
||||
/// This is the no-toolchain-dep dispatch overhead — a regression here
|
||||
/// signals a hot-path allocation introduced into the reverify pipeline.
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn bench_composite_chain_reverify_dispatch(c: &mut Criterion) {
|
||||
use nyx_scanner::chain::reverify;
|
||||
use nyx_scanner::dynamic::verify::VerifyOptions;
|
||||
use nyx_scanner::surface::SurfaceMap;
|
||||
|
||||
let surface = SurfaceMap::new();
|
||||
let opts = VerifyOptions::default();
|
||||
|
||||
c.bench_function("composite_chain_reverify_dispatch", |b| {
|
||||
b.iter(|| {
|
||||
let mut chains = [mk_synthetic_chain(0xC1A1, 3)];
|
||||
let _ = reverify::reverify_top_chains(&mut chains, &[], &surface, &opts, 1);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/// Phase 26 stub-reverifier happy-path anchor: synthetic 3-member
|
||||
/// chain driven through `reverify_top_chains_with` + a stubbed
|
||||
/// reverifier returning `Confirmed`. Measures the apply-verdict path
|
||||
/// when the verdict does NOT trigger a severity downgrade, so the
|
||||
/// `ChainReverifyResult` allocation + `chain.apply_dynamic_verdict`
|
||||
/// transition cost is exercised independent of the verdict-side
|
||||
/// allocation in the dispatch bench.
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn bench_composite_chain_reverify_stub_confirmed(c: &mut Criterion) {
|
||||
use nyx_scanner::chain::reverify;
|
||||
use nyx_scanner::dynamic::verify::VerifyOptions;
|
||||
use nyx_scanner::surface::SurfaceMap;
|
||||
|
||||
let surface = SurfaceMap::new();
|
||||
let opts = VerifyOptions::default();
|
||||
let reverifier = BenchConfirmedReverifier;
|
||||
|
||||
c.bench_function("composite_chain_reverify_stub_confirmed", |b| {
|
||||
b.iter(|| {
|
||||
let mut chains = [mk_synthetic_chain(0xC2A2, 3)];
|
||||
let _ = reverify::reverify_top_chains_with(
|
||||
&mut chains,
|
||||
&[],
|
||||
&surface,
|
||||
&opts,
|
||||
1,
|
||||
&reverifier,
|
||||
);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/// Phase 26 top-N slice anchor: 5-chain slice with `top_n=3`. Asserts
|
||||
/// (by way of regression) that the reverify pass never walks past the
|
||||
/// top-N prefix. The fan-in is the per-chain dispatch cost times three;
|
||||
/// a regression that drops the `bound = top_n.min(chains.len())` cap
|
||||
/// would show up as a ~5/3 increase in this bench.
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn bench_composite_chain_reverify_top_n_slice(c: &mut Criterion) {
|
||||
use nyx_scanner::chain::reverify;
|
||||
use nyx_scanner::dynamic::verify::VerifyOptions;
|
||||
use nyx_scanner::surface::SurfaceMap;
|
||||
|
||||
let surface = SurfaceMap::new();
|
||||
let opts = VerifyOptions::default();
|
||||
let reverifier = BenchConfirmedReverifier;
|
||||
|
||||
c.bench_function("composite_chain_reverify_top_n_slice", |b| {
|
||||
b.iter(|| {
|
||||
let mut chains: [nyx_scanner::chain::ChainFinding; 5] = [
|
||||
mk_synthetic_chain(0xC301, 3),
|
||||
mk_synthetic_chain(0xC302, 3),
|
||||
mk_synthetic_chain(0xC303, 3),
|
||||
mk_synthetic_chain(0xC304, 3),
|
||||
mk_synthetic_chain(0xC305, 3),
|
||||
];
|
||||
let _ = reverify::reverify_top_chains_with(
|
||||
&mut chains,
|
||||
&[],
|
||||
&surface,
|
||||
&opts,
|
||||
3,
|
||||
&reverifier,
|
||||
);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/// Phase 26 replay-stable anchor: same 3-member synthetic chain as
|
||||
/// `stub_confirmed`, driven through `reverify_top_chains_with` with
|
||||
/// `VerifyOptions::replay_stable_check=true`. The `BenchConfirmedReverifier`
|
||||
/// stub honours the opt-in by stamping `replay_stable=Some(true)` on
|
||||
/// the returned `VerifyResult`, exercising the apply-verdict path with
|
||||
/// the telemetry stability field populated.
|
||||
///
|
||||
/// Purpose: anchor the cost of the replay-stable apply path so a
|
||||
/// regression that leaks a real `run_chain_steps` invocation into the
|
||||
/// stubbed verifier layer (or that allocates extra state behind the
|
||||
/// `replay_stable_check` toggle in `chain::reverify::apply_one`) shows
|
||||
/// up immediately against the `stub_confirmed` baseline.
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn bench_composite_chain_reverify_replay_stable(c: &mut Criterion) {
|
||||
use nyx_scanner::chain::reverify;
|
||||
use nyx_scanner::dynamic::verify::VerifyOptions;
|
||||
use nyx_scanner::surface::SurfaceMap;
|
||||
|
||||
let surface = SurfaceMap::new();
|
||||
let opts = VerifyOptions {
|
||||
replay_stable_check: true,
|
||||
..VerifyOptions::default()
|
||||
};
|
||||
let reverifier = BenchConfirmedReverifier;
|
||||
|
||||
c.bench_function("composite_chain_reverify_replay_stable", |b| {
|
||||
b.iter(|| {
|
||||
let mut chains = [mk_synthetic_chain(0xC4A3, 3)];
|
||||
let _ = reverify::reverify_top_chains_with(
|
||||
&mut chains,
|
||||
&[],
|
||||
&surface,
|
||||
&opts,
|
||||
1,
|
||||
&reverifier,
|
||||
);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
#[allow(dead_code)]
|
||||
fn bench_noop(_c: &mut Criterion) {}
|
||||
|
||||
// When dynamic feature is off, provide a stub so the binary still links.
|
||||
#[cfg(not(feature = "dynamic"))]
|
||||
fn bench_noop(c: &mut Criterion) {
|
||||
c.bench_function("dynamic_disabled_noop", |b| b.iter(|| ()));
|
||||
}
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
criterion_group!(
|
||||
dynamic,
|
||||
bench_harness_build_cold,
|
||||
bench_harness_build_warm,
|
||||
bench_sandbox_run_payload,
|
||||
bench_docker_image_build,
|
||||
bench_docker_exec_warm,
|
||||
bench_docker_payload_cost,
|
||||
bench_rust_harness_build_cold,
|
||||
bench_js_harness_build_cold,
|
||||
bench_go_harness_build_cold,
|
||||
bench_java_harness_build_cold,
|
||||
bench_php_harness_build_cold,
|
||||
bench_composite_chain_reverify_dispatch,
|
||||
bench_composite_chain_reverify_stub_confirmed,
|
||||
bench_composite_chain_reverify_top_n_slice,
|
||||
bench_composite_chain_reverify_replay_stable,
|
||||
);
|
||||
|
||||
#[cfg(not(feature = "dynamic"))]
|
||||
criterion_group!(dynamic, bench_noop);
|
||||
|
||||
criterion_main!(dynamic);
|
||||
26
benches/dynamic_bench_baseline.json
Normal file
26
benches/dynamic_bench_baseline.json
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
{
|
||||
"schema": 1,
|
||||
"note": "ASPIRATIONAL placeholder — values were hand-typed, not captured from a real bench run. Regenerate with: benches/regen_baseline.sh (requires --features dynamic and python3 on PATH). Commit the updated file to establish a real regression reference for M3+.",
|
||||
"benchmarks": {
|
||||
"harness_build_cold": {
|
||||
"mean_ns": 800000,
|
||||
"stddev_ns": 120000,
|
||||
"description": "Fresh workdir; spec → BuiltHarness including source gen + disk write."
|
||||
},
|
||||
"harness_build_warm": {
|
||||
"mean_ns": 180000,
|
||||
"stddev_ns": 30000,
|
||||
"description": "Workdir already staged; file write skipped by dst.exists() guard."
|
||||
},
|
||||
"sandbox_run_payload": {
|
||||
"mean_ns": 120000000,
|
||||
"stddev_ns": 15000000,
|
||||
"description": "Single process-backend run with sqli payload; includes python3 startup + settrace."
|
||||
}
|
||||
},
|
||||
"regression_thresholds": {
|
||||
"harness_build_cold": 2.0,
|
||||
"harness_build_warm": 2.0,
|
||||
"sandbox_run_payload": 1.5
|
||||
}
|
||||
}
|
||||
84
benches/regen_baseline.sh
Executable file
84
benches/regen_baseline.sh
Executable file
|
|
@ -0,0 +1,84 @@
|
|||
#!/usr/bin/env bash
|
||||
# Regenerate benches/dynamic_bench_baseline.json from a real cargo bench run.
|
||||
#
|
||||
# Usage:
|
||||
# bash benches/regen_baseline.sh
|
||||
#
|
||||
# Requirements:
|
||||
# - python3 on PATH
|
||||
# - cargo (nightly or stable with edition 2024)
|
||||
# - Criterion's JSON output (criterion feature already in dev-deps)
|
||||
#
|
||||
# The script runs the dynamic bench group, parses Criterion's estimates JSON,
|
||||
# and overwrites dynamic_bench_baseline.json with real numbers.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||
BASELINE_FILE="${SCRIPT_DIR}/dynamic_bench_baseline.json"
|
||||
|
||||
echo "Running cargo bench --features dynamic -- dynamic ..."
|
||||
cargo bench --manifest-path "${REPO_ROOT}/Cargo.toml" \
|
||||
--features dynamic \
|
||||
-- dynamic \
|
||||
2>&1 | tee /tmp/nyx_bench_raw.txt
|
||||
|
||||
# Criterion writes estimates to target/criterion/<bench>/<group>/estimates.json.
|
||||
# Extract mean_ns for each tracked benchmark.
|
||||
extract_ns() {
|
||||
local path="$1"
|
||||
if [[ -f "${path}" ]]; then
|
||||
python3 -c "
|
||||
import json, sys
|
||||
d = json.load(open('${path}'))
|
||||
mean = d['mean']['point_estimate']
|
||||
stddev = (d['std_dev']['point_estimate']) if 'std_dev' in d else 0
|
||||
print(int(mean), int(stddev))
|
||||
"
|
||||
else
|
||||
echo "0 0"
|
||||
fi
|
||||
}
|
||||
|
||||
TARGET="${REPO_ROOT}/target/criterion"
|
||||
|
||||
read COLD_MEAN COLD_STDDEV < <(extract_ns "${TARGET}/harness_build_cold/default/estimates.json")
|
||||
read WARM_MEAN WARM_STDDEV < <(extract_ns "${TARGET}/harness_build_warm/default/estimates.json")
|
||||
read RUN_MEAN RUN_STDDEV < <(extract_ns "${TARGET}/sandbox_run_payload/default/estimates.json")
|
||||
|
||||
MACHINE="$(uname -m) / $(uname -s)"
|
||||
NYX_VER="$(cargo metadata --manifest-path "${REPO_ROOT}/Cargo.toml" --no-deps --format-version 1 \
|
||||
| python3 -c "import json,sys; d=json.load(sys.stdin); print(next(p['version'] for p in d['packages'] if p['name']=='nyx-scanner'))")"
|
||||
DATE="$(date +%Y-%m-%d)"
|
||||
|
||||
cat > "${BASELINE_FILE}" <<EOF
|
||||
{
|
||||
"schema": 1,
|
||||
"note": "Baseline captured on ${MACHINE}, nyx v${NYX_VER}, ${DATE}. Regenerate with: benches/regen_baseline.sh",
|
||||
"benchmarks": {
|
||||
"harness_build_cold": {
|
||||
"mean_ns": ${COLD_MEAN},
|
||||
"stddev_ns": ${COLD_STDDEV},
|
||||
"description": "Fresh workdir; spec → BuiltHarness including source gen + disk write."
|
||||
},
|
||||
"harness_build_warm": {
|
||||
"mean_ns": ${WARM_MEAN},
|
||||
"stddev_ns": ${WARM_STDDEV},
|
||||
"description": "Workdir already staged; file write skipped by dst.exists() guard."
|
||||
},
|
||||
"sandbox_run_payload": {
|
||||
"mean_ns": ${RUN_MEAN},
|
||||
"stddev_ns": ${RUN_STDDEV},
|
||||
"description": "Single process-backend run with sqli payload; includes python3 startup + settrace."
|
||||
}
|
||||
},
|
||||
"regression_thresholds": {
|
||||
"harness_build_cold": 2.0,
|
||||
"harness_build_warm": 2.0,
|
||||
"sandbox_run_payload": 1.5
|
||||
}
|
||||
}
|
||||
EOF
|
||||
|
||||
echo "Updated ${BASELINE_FILE}"
|
||||
Loading…
Add table
Add a link
Reference in a new issue