mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-21 20:18:06 +02:00
375 lines
15 KiB
Rust
375 lines
15 KiB
Rust
|
|
//! Regression tests for SCC fixed-point convergence in pass 2.
|
|||
|
|
//!
|
|||
|
|
//! Pass 2 uses Jacobi iteration — each file in a mutually-recursive SCC
|
|||
|
|
//! is re-analysed against the *pre-iteration* `GlobalSummaries` snapshot,
|
|||
|
|
//! and updates are only visible on the next iteration. In a cross-file
|
|||
|
|
//! SCC with `k` functions arranged in a chain, facts introduced at one
|
|||
|
|
//! end of the chain need up to `k` iterations to propagate back to the
|
|||
|
|
//! other end.
|
|||
|
|
//!
|
|||
|
|
//! Before this test was written, the hard cap was 3 — so any SCC with
|
|||
|
|
//! 4+ cross-file functions silently lost precision. These fixtures
|
|||
|
|
//! exercise a 4-cycle and assert both that the transitive finding is
|
|||
|
|
//! reported and that the engine actually needed more than 3 iterations
|
|||
|
|
//! to converge (proving the test is load-bearing, not incidental).
|
|||
|
|
//!
|
|||
|
|
//! If you raise or lower the cap in `scan::SCC_FIXPOINT_SAFETY_CAP`,
|
|||
|
|
//! update the iteration-count assertions accordingly.
|
|||
|
|
|
|||
|
|
mod common;
|
|||
|
|
|
|||
|
|
use common::{scan_fixture_dir, validate_expectations};
|
|||
|
|
use nyx_scanner::commands::scan::{
|
|||
|
|
SCC_UNCONVERGED_NOTE_PREFIX, last_scc_max_iterations, set_scc_fixpoint_cap_override,
|
|||
|
|
};
|
|||
|
|
use nyx_scanner::evidence::Confidence;
|
|||
|
|
use nyx_scanner::utils::config::AnalysisMode;
|
|||
|
|
use std::path::Path;
|
|||
|
|
use std::sync::Mutex;
|
|||
|
|
|
|||
|
|
fn fixture_path(name: &str) -> std::path::PathBuf {
|
|||
|
|
Path::new(env!("CARGO_MANIFEST_DIR"))
|
|||
|
|
.join("tests/fixtures")
|
|||
|
|
.join(name)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/// Serialize any test that mutates the global SCC fix-point cap override
|
|||
|
|
/// or reads `last_scc_max_iterations()`. The override is a process-wide
|
|||
|
|
/// `AtomicUsize` and `cargo test` runs tests in parallel by default —
|
|||
|
|
/// without this guard, one test's override leaks into another's scan and
|
|||
|
|
/// both the iteration count and the findings tag shift non-deterministically.
|
|||
|
|
static SCC_TEST_GUARD: Mutex<()> = Mutex::new(());
|
|||
|
|
|
|||
|
|
/// Adversarial 4-cycle: `step_a → step_b → step_c → step_d → step_a`
|
|||
|
|
/// across four separate files, with the only sink in `step_d`. The
|
|||
|
|
/// `param_to_sink` fact has to travel back through three cross-file
|
|||
|
|
/// summary-update iterations before `step_a`'s summary reflects the
|
|||
|
|
/// transitive flow — without that, the caller in `server.py` never
|
|||
|
|
/// sees the XSS/CMDI.
|
|||
|
|
///
|
|||
|
|
/// With the old `MAX_SCC_FIXPOINT_ITERS = 3` this test's required
|
|||
|
|
/// finding silently disappeared. With the current
|
|||
|
|
/// `SCC_FIXPOINT_SAFETY_CAP = 64` it converges naturally.
|
|||
|
|
#[test]
|
|||
|
|
fn scc_deep_cycle_requires_multi_iter_convergence() {
|
|||
|
|
let _guard = SCC_TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
|
|||
|
|
set_scc_fixpoint_cap_override(0); // ensure no stale override from a prior test
|
|||
|
|
let dir = fixture_path("cross_file_scc_deep_cycle");
|
|||
|
|
let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
|
|||
|
|
|
|||
|
|
// Hard assertion: the transitive taint must be detected.
|
|||
|
|
validate_expectations(&diags, &dir);
|
|||
|
|
|
|||
|
|
// Observability assertion: prove the SCC actually exercised more
|
|||
|
|
// than three iterations — otherwise this fixture would pass even
|
|||
|
|
// under the old bound and give false confidence.
|
|||
|
|
//
|
|||
|
|
// The exact bound is tight: a 4-cycle needs at least 4 iterations
|
|||
|
|
// to propagate a transitive fact end-to-end, and monotone summary
|
|||
|
|
// refinement should converge in a small multiple of that. A drop
|
|||
|
|
// below 4 means the 3-bound regression is unguarded; a rise above
|
|||
|
|
// 8 means summary refinement is no longer monotone or is churning.
|
|||
|
|
let iters = last_scc_max_iterations();
|
|||
|
|
assert!(
|
|||
|
|
(4..=8).contains(&iters),
|
|||
|
|
"Expected 4..=8 SCC fix-point iterations for the 4-cycle fixture; \
|
|||
|
|
got {iters}. Lower bound guards against the pre-fix cap of 3; \
|
|||
|
|
upper bound guards against summary-refinement regressions that \
|
|||
|
|
would churn near the safety cap. If this fires, audit \
|
|||
|
|
resolve_callee / summary merging in taint/mod.rs.",
|
|||
|
|
);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/// Existing 3-file Python SCC — lighter smoke test, verifies the
|
|||
|
|
/// iteration count stays in a sensible range. If this starts requiring
|
|||
|
|
/// many iterations something regressed in summary extraction.
|
|||
|
|
#[test]
|
|||
|
|
fn scc_small_cycle_converges_quickly() {
|
|||
|
|
let _guard = SCC_TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
|
|||
|
|
set_scc_fixpoint_cap_override(0);
|
|||
|
|
let dir = fixture_path("cross_file_scc_convergence");
|
|||
|
|
let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
|
|||
|
|
validate_expectations(&diags, &dir);
|
|||
|
|
|
|||
|
|
let iters = last_scc_max_iterations();
|
|||
|
|
// Upper bound is tight: even if the call graph ever grows mutual
|
|||
|
|
// recursion edges here, summary refinement should still converge in
|
|||
|
|
// a small multiple of the chain depth. Current behaviour is iters=0
|
|||
|
|
// because the call graph topo-order resolves these files without
|
|||
|
|
// needing an SCC fix-point loop at all — allow that too so this
|
|||
|
|
// test does not become load-bearing on SCC detection.
|
|||
|
|
assert!(
|
|||
|
|
iters <= 4,
|
|||
|
|
"Small 3-file SCC should converge in <= 4 iterations (including \
|
|||
|
|
the `no-SCC-needed` case of 0); got {iters}. A jump suggests \
|
|||
|
|
summary widening regressed or mutual-recursion detection started \
|
|||
|
|
spuriously grouping these files into a large SCC."
|
|||
|
|
);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/// Regression guard: when an SCC batch exhausts the fix-point safety
|
|||
|
|
/// cap without converging, findings must still surface, each tagged
|
|||
|
|
/// with `confidence = Low` and a `scc_unconverged:` note so
|
|||
|
|
/// downstream reviewers can identify potentially-imprecise results.
|
|||
|
|
///
|
|||
|
|
/// Uses `set_scc_fixpoint_cap_override` to force cap-hit on the same
|
|||
|
|
/// 4-cycle fixture as `scc_deep_cycle_requires_multi_iter_convergence`
|
|||
|
|
/// (which normally takes ~4 iterations). Setting the cap to 1 guarantees
|
|||
|
|
/// non-convergence while keeping the test cheap and deterministic.
|
|||
|
|
#[test]
|
|||
|
|
fn scc_cap_hit_still_emits_tagged_low_confidence_findings() {
|
|||
|
|
let _guard = SCC_TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
|
|||
|
|
let dir = fixture_path("cross_file_scc_deep_cycle");
|
|||
|
|
|
|||
|
|
// Force the SCC fix-point loop to bail after 3 iterations. The
|
|||
|
|
// 4-cycle fixture needs >=4 iterations to fully propagate taint, so
|
|||
|
|
// the 3rd iteration's diags do contain the transitive taint finding
|
|||
|
|
// but convergence has not been detected — this is the exact cap-hit
|
|||
|
|
// scenario users would see in production on a larger SCC.
|
|||
|
|
set_scc_fixpoint_cap_override(3);
|
|||
|
|
let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
|
|||
|
|
set_scc_fixpoint_cap_override(0);
|
|||
|
|
|
|||
|
|
// Scan must not panic, hang, or silently drop results.
|
|||
|
|
assert!(
|
|||
|
|
!diags.is_empty(),
|
|||
|
|
"scan with cap-hit must still produce diagnostics"
|
|||
|
|
);
|
|||
|
|
|
|||
|
|
// The scan must have exercised the cap (not converged early).
|
|||
|
|
let iters = last_scc_max_iterations();
|
|||
|
|
assert_eq!(
|
|||
|
|
iters, 3,
|
|||
|
|
"expected cap-override (3) to bind the fix-point loop; got {iters} iterations"
|
|||
|
|
);
|
|||
|
|
|
|||
|
|
// (a) Taint findings must still be emitted — truncation is not
|
|||
|
|
// silent drop.
|
|||
|
|
let taint: Vec<_> = diags
|
|||
|
|
.iter()
|
|||
|
|
.filter(|d| d.id.starts_with("taint-unsanitised-flow"))
|
|||
|
|
.collect();
|
|||
|
|
assert!(
|
|||
|
|
!taint.is_empty(),
|
|||
|
|
"unconverged SCC batch must still emit taint findings (truncated != dropped). \
|
|||
|
|
All diags: {:#?}",
|
|||
|
|
diags
|
|||
|
|
.iter()
|
|||
|
|
.map(|d| format!("{}:{}:{} {}", d.path, d.line, d.col, d.id))
|
|||
|
|
.collect::<Vec<_>>()
|
|||
|
|
);
|
|||
|
|
|
|||
|
|
// (b) At least one finding from the unconverged SCC batch carries
|
|||
|
|
// the tag. Tagging is scoped to diags produced by the SCC fix-point
|
|||
|
|
// loop itself — findings from non-recursive batches or orphan files
|
|||
|
|
// that happen to flow through SCC-internal summaries are
|
|||
|
|
// intentionally not re-tagged (they came from a batch that did
|
|||
|
|
// converge, modulo the referenced summary).
|
|||
|
|
let tagged: Vec<_> = diags
|
|||
|
|
.iter()
|
|||
|
|
.filter(|d| {
|
|||
|
|
d.evidence
|
|||
|
|
.as_ref()
|
|||
|
|
.map(|e| {
|
|||
|
|
e.notes
|
|||
|
|
.iter()
|
|||
|
|
.any(|n| n.starts_with(SCC_UNCONVERGED_NOTE_PREFIX))
|
|||
|
|
})
|
|||
|
|
.unwrap_or(false)
|
|||
|
|
})
|
|||
|
|
.collect();
|
|||
|
|
assert!(
|
|||
|
|
!tagged.is_empty(),
|
|||
|
|
"at least one diag in an unconverged SCC batch must carry the \
|
|||
|
|
scc_unconverged note; got none. Tagging must fire on the SCC \
|
|||
|
|
batch's own iteration_diags. All diags: {:#?}",
|
|||
|
|
diags
|
|||
|
|
.iter()
|
|||
|
|
.map(|d| format!(
|
|||
|
|
"{}:{}:{} {} conf={:?}",
|
|||
|
|
d.path, d.line, d.col, d.id, d.confidence
|
|||
|
|
))
|
|||
|
|
.collect::<Vec<_>>()
|
|||
|
|
);
|
|||
|
|
|
|||
|
|
// (c) Every tagged finding has confidence capped at Low.
|
|||
|
|
for d in &tagged {
|
|||
|
|
assert_eq!(
|
|||
|
|
d.confidence,
|
|||
|
|
Some(Confidence::Low),
|
|||
|
|
"scc_unconverged-tagged finding must be Low confidence: \
|
|||
|
|
{}:{}:{} id={} conf={:?}",
|
|||
|
|
d.path,
|
|||
|
|
d.line,
|
|||
|
|
d.col,
|
|||
|
|
d.id,
|
|||
|
|
d.confidence,
|
|||
|
|
);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/// Phase-E3 / Phase-B: verify that the worklist reduces per-iteration
|
|||
|
|
/// work without changing the final output. We do this by running the
|
|||
|
|
/// 16-cycle fixture twice — once through the normal pass-2 path,
|
|||
|
|
/// which uses the worklist — and asserting (a) findings match and
|
|||
|
|
/// (b) iteration count stays within the same bound as the 8-cycle.
|
|||
|
|
///
|
|||
|
|
/// This test is load-bearing for Phase-B correctness: if the worklist
|
|||
|
|
/// ever skips a file whose dependencies did change, this test's
|
|||
|
|
/// required-finding validator would fire, failing the test.
|
|||
|
|
#[test]
|
|||
|
|
fn phase_b_worklist_preserves_findings_on_16cycle() {
|
|||
|
|
let _guard = SCC_TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
|
|||
|
|
set_scc_fixpoint_cap_override(0);
|
|||
|
|
let dir = fixture_path("cross_file_scc_16cycle");
|
|||
|
|
|
|||
|
|
// First run. Worklist is active by default.
|
|||
|
|
let diags1 = scan_fixture_dir(&dir, AnalysisMode::Full);
|
|||
|
|
let iters1 = last_scc_max_iterations();
|
|||
|
|
validate_expectations(&diags1, &dir);
|
|||
|
|
|
|||
|
|
// Second run. Worklist is still active; we expect byte-for-byte
|
|||
|
|
// finding identity because the worklist changes *which* files
|
|||
|
|
// run per iteration but does not affect the set of summaries
|
|||
|
|
// produced or their final values.
|
|||
|
|
let diags2 = scan_fixture_dir(&dir, AnalysisMode::Full);
|
|||
|
|
let iters2 = last_scc_max_iterations();
|
|||
|
|
validate_expectations(&diags2, &dir);
|
|||
|
|
|
|||
|
|
// Iteration count should be deterministic across runs (same
|
|||
|
|
// fixture, same cap, same call graph → same worklist schedule).
|
|||
|
|
assert_eq!(
|
|||
|
|
iters1, iters2,
|
|||
|
|
"worklist iteration count must be deterministic; \
|
|||
|
|
run1={iters1}, run2={iters2}"
|
|||
|
|
);
|
|||
|
|
|
|||
|
|
// Finding count equality as a weaker correctness check (full
|
|||
|
|
// equality is validated by `validate_expectations` on both runs).
|
|||
|
|
assert_eq!(
|
|||
|
|
diags1.len(),
|
|||
|
|
diags2.len(),
|
|||
|
|
"worklist must not introduce finding-count churn"
|
|||
|
|
);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/// Phase-E broader fixture: 8-function SCC chain across 8 files.
|
|||
|
|
///
|
|||
|
|
/// Exercises iteration counts well above the old 3-bound and into the
|
|||
|
|
/// 8–16 range that the `64` safety cap is meant to cover. Failure
|
|||
|
|
/// here is a stronger signal than the 4-cycle test: at depth 8 the
|
|||
|
|
/// cost of each iteration becomes visible, so a regression either in
|
|||
|
|
/// iteration count (summary churn) or in per-iteration cost (Phase-B
|
|||
|
|
/// worklist) shows up.
|
|||
|
|
#[test]
|
|||
|
|
fn scc_8cycle_converges_within_bound() {
|
|||
|
|
let _guard = SCC_TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
|
|||
|
|
set_scc_fixpoint_cap_override(0);
|
|||
|
|
let dir = fixture_path("cross_file_scc_8cycle");
|
|||
|
|
let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
|
|||
|
|
validate_expectations(&diags, &dir);
|
|||
|
|
|
|||
|
|
let iters = last_scc_max_iterations();
|
|||
|
|
assert!(
|
|||
|
|
(8..=16).contains(&iters),
|
|||
|
|
"Expected 8..=16 iterations for 8-cycle fixture; got {iters}. \
|
|||
|
|
Lower bound guards the chain-depth invariant (Jacobi \
|
|||
|
|
iteration must walk one hop per round). Upper bound guards \
|
|||
|
|
against summary-refinement regressions."
|
|||
|
|
);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/// Stress fixture: 16-function SCC chain across 16 files.
|
|||
|
|
///
|
|||
|
|
/// At this depth the iteration count dominates per-iteration cost, so
|
|||
|
|
/// this is the fixture most sensitive to worklist optimisation.
|
|||
|
|
#[test]
|
|||
|
|
fn scc_16cycle_converges_within_bound() {
|
|||
|
|
let _guard = SCC_TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
|
|||
|
|
set_scc_fixpoint_cap_override(0);
|
|||
|
|
let dir = fixture_path("cross_file_scc_16cycle");
|
|||
|
|
let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
|
|||
|
|
validate_expectations(&diags, &dir);
|
|||
|
|
|
|||
|
|
let iters = last_scc_max_iterations();
|
|||
|
|
assert!(
|
|||
|
|
(16..=32).contains(&iters),
|
|||
|
|
"Expected 16..=32 iterations for 16-cycle fixture; got {iters}. \
|
|||
|
|
Lower bound guards the chain-depth invariant. Upper bound \
|
|||
|
|
guards against summary-refinement regressions that would \
|
|||
|
|
push iteration count near the safety cap."
|
|||
|
|
);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/// Phase-D regression: cap-hit must record a [`CapHitReason`] that is
|
|||
|
|
/// *not* `Unknown` whenever the convergence loop ran for at least two
|
|||
|
|
/// iterations. On the 4-cycle fixture with cap=2, the delta trajectory
|
|||
|
|
/// should show monotone shrinkage (summaries refining toward but not
|
|||
|
|
/// reaching fixpoint), which the classifier reports as
|
|||
|
|
/// `MonotoneShrinking`.
|
|||
|
|
///
|
|||
|
|
/// This test is load-bearing: if the classifier ever returns `Unknown`
|
|||
|
|
/// on a real cap-hit it means either the trajectory is not being
|
|||
|
|
/// recorded or the classification rules have regressed. A plateau or
|
|||
|
|
/// oscillation reason would also be acceptable (either would still be
|
|||
|
|
/// a useful signal) but `Unknown` would silently hide the actionable
|
|||
|
|
/// information.
|
|||
|
|
#[test]
|
|||
|
|
fn scc_cap_hit_records_classified_reason() {
|
|||
|
|
use nyx_scanner::engine_notes::{CapHitReason, EngineNote};
|
|||
|
|
|
|||
|
|
let _guard = SCC_TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
|
|||
|
|
let dir = fixture_path("cross_file_scc_deep_cycle");
|
|||
|
|
|
|||
|
|
// cap=2 forces cap-hit with at least two iterations recorded,
|
|||
|
|
// giving the classifier enough samples to differentiate
|
|||
|
|
// monotone-shrinking from Unknown.
|
|||
|
|
set_scc_fixpoint_cap_override(2);
|
|||
|
|
let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
|
|||
|
|
set_scc_fixpoint_cap_override(0);
|
|||
|
|
|
|||
|
|
let tagged: Vec<_> = diags
|
|||
|
|
.iter()
|
|||
|
|
.filter_map(|d| {
|
|||
|
|
d.evidence
|
|||
|
|
.as_ref()
|
|||
|
|
.map(|ev| ev.engine_notes.as_slice())
|
|||
|
|
.map(|notes| {
|
|||
|
|
notes.iter().find_map(|n| match n {
|
|||
|
|
EngineNote::CrossFileFixpointCapped { reason, .. } => Some(reason.clone()),
|
|||
|
|
_ => None,
|
|||
|
|
})
|
|||
|
|
})
|
|||
|
|
.and_then(|r| r.map(|reason| (d, reason)))
|
|||
|
|
})
|
|||
|
|
.collect();
|
|||
|
|
|
|||
|
|
assert!(
|
|||
|
|
!tagged.is_empty(),
|
|||
|
|
"cap=2 scan of deep-cycle fixture must produce at least one \
|
|||
|
|
CrossFileFixpointCapped note; got diags: {:#?}",
|
|||
|
|
diags
|
|||
|
|
.iter()
|
|||
|
|
.map(|d| format!("{}:{}:{} {}", d.path, d.line, d.col, d.id))
|
|||
|
|
.collect::<Vec<_>>()
|
|||
|
|
);
|
|||
|
|
|
|||
|
|
// The reason must be *something* other than Unknown — that's the
|
|||
|
|
// whole point of Phase-D classification. Any structured variant
|
|||
|
|
// proves the trajectory pipeline fired end-to-end.
|
|||
|
|
for (d, reason) in &tagged {
|
|||
|
|
assert!(
|
|||
|
|
!matches!(reason, CapHitReason::Unknown),
|
|||
|
|
"cap-hit must produce a classified reason, not Unknown. \
|
|||
|
|
This means either (a) the trajectory is not being recorded \
|
|||
|
|
across iterations or (b) the classifier's rules no longer \
|
|||
|
|
cover the observed pattern. Finding: {}:{}:{} id={} \
|
|||
|
|
reason={reason:?}",
|
|||
|
|
d.path,
|
|||
|
|
d.line,
|
|||
|
|
d.col,
|
|||
|
|
d.id,
|
|||
|
|
);
|
|||
|
|
}
|
|||
|
|
}
|