nyx/tests/scc_convergence_tests.rs
Eli Peter a438886217
Python fp and docs updtes (#58)
* refactor: Update comments for clarity and add expectations.json files for performance metrics

* feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks

* feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks

* refactor: Simplify code formatting for better readability in multiple files

* refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration

* feat: Update Java and Python patterns to include new security rules

* refactor: Improve comment clarity and consistency across multiple Rust files

* refactor: Simplify code formatting for improved readability in integration tests and module files

* refactor: Improve comment formatting and enhance clarity in assertions across multiple files
2026-04-29 19:53:34 -04:00

374 lines
15 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Regression tests for SCC fixed-point convergence in pass 2.
//!
//! Pass 2 uses Jacobi iteration, each file in a mutually-recursive SCC
//! is re-analysed against the *pre-iteration* `GlobalSummaries` snapshot,
//! and updates are only visible on the next iteration. In a cross-file
//! SCC with `k` functions arranged in a chain, facts introduced at one
//! end of the chain need up to `k` iterations to propagate back to the
//! other end.
//!
//! Before this test was written, the hard cap was 3, so any SCC with
//! 4+ cross-file functions silently lost precision. These fixtures
//! exercise a 4-cycle and assert both that the transitive finding is
//! reported and that the engine actually needed more than 3 iterations
//! to converge (proving the test is load-bearing, not incidental).
//!
//! If you raise or lower the cap in `scan::SCC_FIXPOINT_SAFETY_CAP`,
//! update the iteration-count assertions accordingly.
mod common;
use common::{scan_fixture_dir, validate_expectations};
use nyx_scanner::commands::scan::{
SCC_UNCONVERGED_NOTE_PREFIX, last_scc_max_iterations, set_scc_fixpoint_cap_override,
};
use nyx_scanner::evidence::Confidence;
use nyx_scanner::utils::config::AnalysisMode;
use std::path::Path;
use std::sync::Mutex;
fn fixture_path(name: &str) -> std::path::PathBuf {
Path::new(env!("CARGO_MANIFEST_DIR"))
.join("tests/fixtures")
.join(name)
}
/// Serialize any test that mutates the global SCC fix-point cap override
/// or reads `last_scc_max_iterations()`. The override is a process-wide
/// `AtomicUsize` and `cargo test` runs tests in parallel by default ,
/// without this guard, one test's override leaks into another's scan and
/// both the iteration count and the findings tag shift non-deterministically.
static SCC_TEST_GUARD: Mutex<()> = Mutex::new(());
/// Adversarial 4-cycle: `step_a → step_b → step_c → step_d → step_a`
/// across four separate files, with the only sink in `step_d`. The
/// `param_to_sink` fact has to travel back through three cross-file
/// summary-update iterations before `step_a`'s summary reflects the
/// transitive flow, without that, the caller in `server.py` never
/// sees the XSS/CMDI.
///
/// With the old `MAX_SCC_FIXPOINT_ITERS = 3` this test's required
/// finding silently disappeared. With the current
/// `SCC_FIXPOINT_SAFETY_CAP = 64` it converges naturally.
#[test]
fn scc_deep_cycle_requires_multi_iter_convergence() {
let _guard = SCC_TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
set_scc_fixpoint_cap_override(0); // ensure no stale override from a prior test
let dir = fixture_path("cross_file_scc_deep_cycle");
let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
// Hard assertion: the transitive taint must be detected.
validate_expectations(&diags, &dir);
// Observability assertion: prove the SCC actually exercised more
// than three iterations, otherwise this fixture would pass even
// under the old bound and give false confidence.
//
// The exact bound is tight: a 4-cycle needs at least 4 iterations
// to propagate a transitive fact end-to-end, and monotone summary
// refinement should converge in a small multiple of that. A drop
// below 4 means the 3-bound regression is unguarded; a rise above
// 8 means summary refinement is no longer monotone or is churning.
let iters = last_scc_max_iterations();
assert!(
(4..=8).contains(&iters),
"Expected 4..=8 SCC fix-point iterations for the 4-cycle fixture; \
got {iters}. Lower bound guards against the pre-fix cap of 3; \
upper bound guards against summary-refinement regressions that \
would churn near the safety cap. If this fires, audit \
resolve_callee / summary merging in taint/mod.rs.",
);
}
/// Existing 3-file Python SCC, lighter smoke test, verifies the
/// iteration count stays in a sensible range. If this starts requiring
/// many iterations something regressed in summary extraction.
#[test]
fn scc_small_cycle_converges_quickly() {
let _guard = SCC_TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
set_scc_fixpoint_cap_override(0);
let dir = fixture_path("cross_file_scc_convergence");
let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
validate_expectations(&diags, &dir);
let iters = last_scc_max_iterations();
// Upper bound is tight: even if the call graph ever grows mutual
// recursion edges here, summary refinement should still converge in
// a small multiple of the chain depth. Current behaviour is iters=0
// because the call graph topo-order resolves these files without
// needing an SCC fix-point loop at all, allow that too so this
// test does not become load-bearing on SCC detection.
assert!(
iters <= 4,
"Small 3-file SCC should converge in <= 4 iterations (including \
the `no-SCC-needed` case of 0); got {iters}. A jump suggests \
summary widening regressed or mutual-recursion detection started \
spuriously grouping these files into a large SCC."
);
}
/// Regression guard: when an SCC batch exhausts the fix-point safety
/// cap without converging, findings must still surface, each tagged
/// with `confidence = Low` and a `scc_unconverged:` note so
/// downstream reviewers can identify potentially-imprecise results.
///
/// Uses `set_scc_fixpoint_cap_override` to force cap-hit on the same
/// 4-cycle fixture as `scc_deep_cycle_requires_multi_iter_convergence`
/// (which normally takes ~4 iterations). Setting the cap to 1 guarantees
/// non-convergence while keeping the test cheap and deterministic.
#[test]
fn scc_cap_hit_still_emits_tagged_low_confidence_findings() {
let _guard = SCC_TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
let dir = fixture_path("cross_file_scc_deep_cycle");
// Force the SCC fix-point loop to bail after 3 iterations. The
// 4-cycle fixture needs >=4 iterations to fully propagate taint, so
// the 3rd iteration's diags do contain the transitive taint finding
// but convergence has not been detected, this is the exact cap-hit
// scenario users would see in production on a larger SCC.
set_scc_fixpoint_cap_override(3);
let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
set_scc_fixpoint_cap_override(0);
// Scan must not panic, hang, or silently drop results.
assert!(
!diags.is_empty(),
"scan with cap-hit must still produce diagnostics"
);
// The scan must have exercised the cap (not converged early).
let iters = last_scc_max_iterations();
assert_eq!(
iters, 3,
"expected cap-override (3) to bind the fix-point loop; got {iters} iterations"
);
// (a) Taint findings must still be emitted, truncation is not
// silent drop.
let taint: Vec<_> = diags
.iter()
.filter(|d| d.id.starts_with("taint-unsanitised-flow"))
.collect();
assert!(
!taint.is_empty(),
"unconverged SCC batch must still emit taint findings (truncated != dropped). \
All diags: {:#?}",
diags
.iter()
.map(|d| format!("{}:{}:{} {}", d.path, d.line, d.col, d.id))
.collect::<Vec<_>>()
);
// (b) At least one finding from the unconverged SCC batch carries
// the tag. Tagging is scoped to diags produced by the SCC fix-point
// loop itself, findings from non-recursive batches or orphan files
// that happen to flow through SCC-internal summaries are
// intentionally not re-tagged (they came from a batch that did
// converge, modulo the referenced summary).
let tagged: Vec<_> = diags
.iter()
.filter(|d| {
d.evidence
.as_ref()
.map(|e| {
e.notes
.iter()
.any(|n| n.starts_with(SCC_UNCONVERGED_NOTE_PREFIX))
})
.unwrap_or(false)
})
.collect();
assert!(
!tagged.is_empty(),
"at least one diag in an unconverged SCC batch must carry the \
scc_unconverged note; got none. Tagging must fire on the SCC \
batch's own iteration_diags. All diags: {:#?}",
diags
.iter()
.map(|d| format!(
"{}:{}:{} {} conf={:?}",
d.path, d.line, d.col, d.id, d.confidence
))
.collect::<Vec<_>>()
);
// (c) Every tagged finding has confidence capped at Low.
for d in &tagged {
assert_eq!(
d.confidence,
Some(Confidence::Low),
"scc_unconverged-tagged finding must be Low confidence: \
{}:{}:{} id={} conf={:?}",
d.path,
d.line,
d.col,
d.id,
d.confidence,
);
}
}
/// Phase-E3 / Phase-B: verify that the worklist reduces per-iteration
/// work without changing the final output. We do this by running the
/// 16-cycle fixture twice, once through the normal pass-2 path,
/// which uses the worklist, and asserting (a) findings match and
/// (b) iteration count stays within the same bound as the 8-cycle.
///
/// This test is load-bearing for Phase-B correctness: if the worklist
/// ever skips a file whose dependencies did change, this test's
/// required-finding validator would fire, failing the test.
#[test]
fn phase_b_worklist_preserves_findings_on_16cycle() {
let _guard = SCC_TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
set_scc_fixpoint_cap_override(0);
let dir = fixture_path("cross_file_scc_16cycle");
// First run. Worklist is active by default.
let diags1 = scan_fixture_dir(&dir, AnalysisMode::Full);
let iters1 = last_scc_max_iterations();
validate_expectations(&diags1, &dir);
// Second run. Worklist is still active; we expect byte-for-byte
// finding identity because the worklist changes *which* files
// run per iteration but does not affect the set of summaries
// produced or their final values.
let diags2 = scan_fixture_dir(&dir, AnalysisMode::Full);
let iters2 = last_scc_max_iterations();
validate_expectations(&diags2, &dir);
// Iteration count should be deterministic across runs (same
// fixture, same cap, same call graph → same worklist schedule).
assert_eq!(
iters1, iters2,
"worklist iteration count must be deterministic; \
run1={iters1}, run2={iters2}"
);
// Finding count equality as a weaker correctness check (full
// equality is validated by `validate_expectations` on both runs).
assert_eq!(
diags1.len(),
diags2.len(),
"worklist must not introduce finding-count churn"
);
}
/// Phase-E broader fixture: 8-function SCC chain across 8 files.
///
/// Exercises iteration counts well above the old 3-bound and into the
/// 816 range that the `64` safety cap is meant to cover. Failure
/// here is a stronger signal than the 4-cycle test: at depth 8 the
/// cost of each iteration becomes visible, so a regression either in
/// iteration count (summary churn) or in per-iteration cost (Phase-B
/// worklist) shows up.
#[test]
fn scc_8cycle_converges_within_bound() {
let _guard = SCC_TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
set_scc_fixpoint_cap_override(0);
let dir = fixture_path("cross_file_scc_8cycle");
let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
validate_expectations(&diags, &dir);
let iters = last_scc_max_iterations();
assert!(
(8..=16).contains(&iters),
"Expected 8..=16 iterations for 8-cycle fixture; got {iters}. \
Lower bound guards the chain-depth invariant (Jacobi \
iteration must walk one hop per round). Upper bound guards \
against summary-refinement regressions."
);
}
/// Stress fixture: 16-function SCC chain across 16 files.
///
/// At this depth the iteration count dominates per-iteration cost, so
/// this is the fixture most sensitive to worklist optimisation.
#[test]
fn scc_16cycle_converges_within_bound() {
let _guard = SCC_TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
set_scc_fixpoint_cap_override(0);
let dir = fixture_path("cross_file_scc_16cycle");
let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
validate_expectations(&diags, &dir);
let iters = last_scc_max_iterations();
assert!(
(16..=32).contains(&iters),
"Expected 16..=32 iterations for 16-cycle fixture; got {iters}. \
Lower bound guards the chain-depth invariant. Upper bound \
guards against summary-refinement regressions that would \
push iteration count near the safety cap."
);
}
/// Phase-D regression: cap-hit must record a [`CapHitReason`] that is
/// *not* `Unknown` whenever the convergence loop ran for at least two
/// iterations. On the 4-cycle fixture with cap=2, the delta trajectory
/// should show monotone shrinkage (summaries refining toward but not
/// reaching fixpoint), which the classifier reports as
/// `MonotoneShrinking`.
///
/// This test is load-bearing: if the classifier ever returns `Unknown`
/// on a real cap-hit it means either the trajectory is not being
/// recorded or the classification rules have regressed. A plateau or
/// oscillation reason would also be acceptable (either would still be
/// a useful signal) but `Unknown` would silently hide the actionable
/// information.
#[test]
fn scc_cap_hit_records_classified_reason() {
use nyx_scanner::engine_notes::{CapHitReason, EngineNote};
let _guard = SCC_TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
let dir = fixture_path("cross_file_scc_deep_cycle");
// cap=2 forces cap-hit with at least two iterations recorded,
// giving the classifier enough samples to differentiate
// monotone-shrinking from Unknown.
set_scc_fixpoint_cap_override(2);
let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
set_scc_fixpoint_cap_override(0);
let tagged: Vec<_> = diags
.iter()
.filter_map(|d| {
d.evidence
.as_ref()
.map(|ev| ev.engine_notes.as_slice())
.map(|notes| {
notes.iter().find_map(|n| match n {
EngineNote::CrossFileFixpointCapped { reason, .. } => Some(reason.clone()),
_ => None,
})
})
.and_then(|r| r.map(|reason| (d, reason)))
})
.collect();
assert!(
!tagged.is_empty(),
"cap=2 scan of deep-cycle fixture must produce at least one \
CrossFileFixpointCapped note; got diags: {:#?}",
diags
.iter()
.map(|d| format!("{}:{}:{} {}", d.path, d.line, d.col, d.id))
.collect::<Vec<_>>()
);
// The reason must be *something* other than Unknown, that's the
// whole point of Phase-D classification. Any structured variant
// proves the trajectory pipeline fired end-to-end.
for (d, reason) in &tagged {
assert!(
!matches!(reason, CapHitReason::Unknown),
"cap-hit must produce a classified reason, not Unknown. \
This means either (a) the trajectory is not being recorded \
across iterations or (b) the classifier's rules no longer \
cover the observed pattern. Finding: {}:{}:{} id={} \
reason={reason:?}",
d.path,
d.line,
d.col,
d.id,
);
}
}