mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
129 lines
4.6 KiB
Rust
129 lines
4.6 KiB
Rust
//! Regression guard: per-thread-count determinism.
|
|
//!
|
|
//! The scanner's two-pass pipeline runs rayon `par_iter` over files in
|
|
//! both pass-1 (summary extraction) and pass-2 (rule evaluation), and
|
|
//! merges summaries via `try_reduce`. A latent ordering bug, a
|
|
//! shared mutable state hit unprotected from multiple threads, or a
|
|
//! `HashMap` iteration order leaking into a finding identity, can
|
|
//! surface as a diagnostic that appears with 4 workers but not with 1.
|
|
//!
|
|
//! This test runs the same fixture under worker-thread counts of 1,
|
|
//! 2, 4, and 8, then asserts the normalised finding set matches the
|
|
//! single-threaded baseline. The normalisation strips volatile bits
|
|
//! (rank_score ordering ties, suppression book-keeping, etc.) so the
|
|
//! assertion fires only on real output divergence.
|
|
//!
|
|
//! If this test ever flakes, prefer investigating the engine over
|
|
//! weakening the normaliser, engine-level determinism across thread
|
|
//! counts is load-bearing for reproducible CI runs.
|
|
mod common;
|
|
|
|
use common::test_config;
|
|
use nyx_scanner::commands::scan::Diag;
|
|
use nyx_scanner::scan_no_index;
|
|
use nyx_scanner::utils::config::AnalysisMode;
|
|
use rayon::ThreadPoolBuilder;
|
|
use std::path::{Path, PathBuf};
|
|
|
|
fn fixture_path(name: &str) -> PathBuf {
|
|
Path::new(env!("CARGO_MANIFEST_DIR"))
|
|
.join("tests")
|
|
.join("fixtures")
|
|
.join(name)
|
|
}
|
|
|
|
/// Canonicalised fingerprint of a finding used for cross-thread
|
|
/// equality. Includes the structural fields that should be
|
|
/// deterministic across thread counts and excludes volatile
|
|
/// bookkeeping (rank_score float ties, suppression metadata with
|
|
/// pointer-derived content).
|
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
|
struct FindingKey {
|
|
path: String,
|
|
line: usize,
|
|
col: usize,
|
|
rule_id: String,
|
|
severity: String,
|
|
path_validated: bool,
|
|
finding_id: String,
|
|
alternative_finding_ids: Vec<String>,
|
|
}
|
|
|
|
fn project(diags: &[Diag]) -> Vec<FindingKey> {
|
|
let mut keys: Vec<FindingKey> = diags
|
|
.iter()
|
|
.map(|d| {
|
|
let mut alts = d.alternative_finding_ids.clone();
|
|
alts.sort();
|
|
FindingKey {
|
|
path: d.path.clone(),
|
|
line: d.line,
|
|
col: d.col,
|
|
rule_id: d.id.clone(),
|
|
severity: d.severity.as_db_str().to_string(),
|
|
path_validated: d.path_validated,
|
|
finding_id: d.finding_id.clone(),
|
|
alternative_finding_ids: alts,
|
|
}
|
|
})
|
|
.collect();
|
|
keys.sort();
|
|
keys
|
|
}
|
|
|
|
/// Run a scan pinned to `threads` worker threads for both the file
|
|
/// walker and the rayon pass-1/2 parallel iterators.
|
|
fn run_scan_with_threads(fixture: &Path, threads: usize) -> Vec<Diag> {
|
|
let mut cfg = test_config(AnalysisMode::Full);
|
|
cfg.performance.worker_threads = Some(threads);
|
|
|
|
let pool = ThreadPoolBuilder::new()
|
|
.num_threads(threads)
|
|
.build()
|
|
.expect("build rayon thread pool");
|
|
|
|
pool.install(|| scan_no_index(fixture, &cfg).expect("scan_no_index should succeed"))
|
|
}
|
|
|
|
#[test]
|
|
fn scan_is_deterministic_across_thread_counts() {
|
|
// A small cross-file fixture is enough to exercise the merge paths
|
|
// that most often flake under thread contention. `cross_file_js_sqli`
|
|
// has both pass-1 summaries and a cross-file taint finding.
|
|
let fixture = fixture_path("cross_file_js_sqli");
|
|
|
|
let mut findings_by_threads: Vec<(usize, Vec<Diag>)> = Vec::new();
|
|
for &threads in &[1usize, 2, 4, 8] {
|
|
let diags = run_scan_with_threads(&fixture, threads);
|
|
findings_by_threads.push((threads, diags));
|
|
}
|
|
|
|
let baseline = project(&findings_by_threads[0].1);
|
|
assert!(
|
|
!baseline.is_empty(),
|
|
"baseline produced no findings — the determinism test relies on \
|
|
a non-empty finding set to be meaningful. Check the fixture \
|
|
still trips the engine."
|
|
);
|
|
|
|
for (threads, diags) in &findings_by_threads[1..] {
|
|
let candidate = project(diags);
|
|
assert_eq!(
|
|
candidate,
|
|
baseline,
|
|
"worker_threads={} produced a different normalised finding \
|
|
set than the 1-thread baseline. This indicates a \
|
|
nondeterministic path in scan_filesystem — most likely a \
|
|
shared mutable state accessed without synchronisation, or \
|
|
a HashMap iteration order leaking into a finding \
|
|
identity.\n\n\
|
|
baseline ({} findings): {:#?}\n\n\
|
|
candidate ({} findings): {:#?}",
|
|
threads,
|
|
baseline.len(),
|
|
baseline,
|
|
candidate.len(),
|
|
candidate,
|
|
);
|
|
}
|
|
}
|