mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-06 19:35:13 +02:00
* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
254 lines
10 KiB
Rust
254 lines
10 KiB
Rust
//! Regression guard: edit-and-rescan parity for anonymous functions.
|
|
//!
|
|
//! The scanner identifies anonymous / closure / lambda bodies by
|
|
//! `FuncKey.disambig`. An earlier implementation keyed that field on
|
|
//! the function node's `start_byte`, so inserting a line *above* an
|
|
//! unchanged anonymous function shifted its identity and invalidated
|
|
//! persisted callback bindings and SSA summaries that referenced it ,
|
|
//! producing different diagnostics for semantically identical code.
|
|
//!
|
|
//! The disambig is now a depth-first preorder index over the file's
|
|
//! function nodes. That index is stable against edits that do not add
|
|
//! or remove functions, so these tests assert:
|
|
//!
|
|
//! * A local edit above an anonymous function leaves the finding set
|
|
//! unchanged modulo line-number shifts.
|
|
//! * A cross-file callback flow where one file gets a comment added
|
|
//! above the exported anonymous function still resolves end-to-end on
|
|
//! the rescan.
|
|
//!
|
|
//! Both tests share the *same* on-disk SQLite index between scans to
|
|
//! exercise the stable-disambig invariant against real cached
|
|
//! `FuncKey`s.
|
|
|
|
#[allow(dead_code)]
|
|
mod common;
|
|
|
|
use common::test_config;
|
|
use nyx_scanner::commands::index::build_index;
|
|
use nyx_scanner::commands::scan::{Diag, scan_with_index_parallel};
|
|
use nyx_scanner::database::index::Indexer;
|
|
use nyx_scanner::utils::config::AnalysisMode;
|
|
use std::collections::BTreeSet;
|
|
use std::fs;
|
|
use std::path::Path;
|
|
use std::sync::Arc;
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Fingerprint that ignores line shifts of a known delta.
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
/// Stable per-finding key across a known `line_delta` shift. Includes
|
|
/// enough structural fields to distinguish genuine divergence from a
|
|
/// pure line-offset reshuffle.
|
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
|
struct ShiftedKey {
|
|
rel_path: String,
|
|
adjusted_line: i64,
|
|
col: usize,
|
|
rule_id: String,
|
|
severity: String,
|
|
path_validated: bool,
|
|
}
|
|
|
|
/// Normalize a rule id so the embedded `(source N:M)` suffix on taint
|
|
/// findings, which names the *source* line, is shifted by `line_delta`
|
|
/// instead of compared literally.
|
|
fn normalize_rule_id(id: &str, line_delta: i64) -> String {
|
|
let Some(open) = id.find("(source ") else {
|
|
return id.to_string();
|
|
};
|
|
let Some(close_rel) = id[open..].find(')') else {
|
|
return id.to_string();
|
|
};
|
|
let body = &id[open + "(source ".len()..open + close_rel];
|
|
let Some((l, c)) = body.split_once(':') else {
|
|
return id.to_string();
|
|
};
|
|
let (Ok(line), Ok(col)) = (l.parse::<i64>(), c.parse::<i64>()) else {
|
|
return id.to_string();
|
|
};
|
|
let rest = &id[open + close_rel + 1..];
|
|
format!(
|
|
"{}(source {}:{}){}",
|
|
&id[..open],
|
|
line - line_delta,
|
|
col,
|
|
rest,
|
|
)
|
|
}
|
|
|
|
fn shifted_key(d: &Diag, fixture_root: &Path, line_delta: i64) -> ShiftedKey {
|
|
let rel = Path::new(&d.path)
|
|
.strip_prefix(fixture_root)
|
|
.map(|p| p.to_string_lossy().into_owned())
|
|
.unwrap_or_else(|_| d.path.clone())
|
|
.replace('\\', "/");
|
|
ShiftedKey {
|
|
rel_path: rel,
|
|
adjusted_line: d.line as i64 - line_delta,
|
|
col: d.col,
|
|
rule_id: normalize_rule_id(&d.id, line_delta),
|
|
severity: d.severity.as_db_str().to_string(),
|
|
path_validated: d.path_validated,
|
|
}
|
|
}
|
|
|
|
fn shifted_set(diags: &[Diag], fixture_root: &Path, line_delta: i64) -> BTreeSet<ShiftedKey> {
|
|
diags
|
|
.iter()
|
|
.map(|d| shifted_key(d, fixture_root, line_delta))
|
|
.collect()
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Scan helper: reuse the same DB pool across scans.
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
fn indexed_scan(
|
|
project: &str,
|
|
pool: &Arc<r2d2::Pool<r2d2_sqlite::SqliteConnectionManager>>,
|
|
root: &Path,
|
|
mode: AnalysisMode,
|
|
) -> Vec<Diag> {
|
|
let cfg = test_config(mode);
|
|
scan_with_index_parallel(project, Arc::clone(pool), &cfg, false, root).expect("indexed scan")
|
|
}
|
|
|
|
fn cold_build_index(project: &str, root: &Path, db_path: &Path, mode: AnalysisMode) {
|
|
let cfg = test_config(mode);
|
|
build_index(project, root, db_path, &cfg, false).expect("build_index");
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Test 1, local edit above a nested anonymous function.
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
const LOCAL_FIXTURE_BEFORE: &str = "\
|
|
function apply(fn, data) { return fn(data); }
|
|
const cmd = process.env.USER_CMD;
|
|
apply(function (x) { require('child_process').exec(x); }, cmd);
|
|
";
|
|
|
|
/// Insert a single blank line at the top of the file.
|
|
fn prepend_blank(s: &str) -> String {
|
|
format!("\n{s}")
|
|
}
|
|
|
|
#[test]
|
|
fn anon_fn_finding_stable_across_blank_line_prepend() {
|
|
let td = tempfile::tempdir().expect("tempdir");
|
|
let root = td.path().to_path_buf();
|
|
let js_path = root.join("handler.js");
|
|
fs::write(&js_path, LOCAL_FIXTURE_BEFORE).expect("write fixture");
|
|
|
|
let db_path = root.join("incremental.sqlite");
|
|
cold_build_index("incr-local", &root, &db_path, AnalysisMode::Full);
|
|
let pool = Indexer::init(&db_path).expect("init pool");
|
|
|
|
// Step 1: baseline scan.
|
|
let before_diags = indexed_scan("incr-local", &pool, &root, AnalysisMode::Full);
|
|
let before = shifted_set(&before_diags, &root, 0);
|
|
|
|
// Must actually exercise an anonymous-function flow, otherwise the
|
|
// test is vacuously green.
|
|
assert!(
|
|
!before.is_empty(),
|
|
"baseline must emit at least one finding to exercise anon-fn identity; got none.\n\
|
|
Raw diags: {:#?}",
|
|
before_diags
|
|
);
|
|
|
|
// Step 2: prepend a blank line (shifts every byte offset but changes
|
|
// no semantics). Re-scan using the *same* on-disk index so cross-
|
|
// scan state (summaries, callback bindings) is exercised.
|
|
fs::write(&js_path, prepend_blank(LOCAL_FIXTURE_BEFORE)).expect("rewrite fixture");
|
|
let after_diags = indexed_scan("incr-local", &pool, &root, AnalysisMode::Full);
|
|
let after = shifted_set(&after_diags, &root, 1);
|
|
|
|
assert_eq!(
|
|
before, after,
|
|
"edit-and-rescan parity broken: findings diverge after unrelated blank-line prepend.\n\
|
|
Before (line delta 0): {:#?}\n\
|
|
After (line delta 1): {:#?}\n\
|
|
Raw before: {:#?}\n\
|
|
Raw after: {:#?}",
|
|
before, after, before_diags, after_diags
|
|
);
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Test 2, cross-file callback resolution after comment-line insert.
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
const CROSS_FILE_A_BEFORE: &str = "\
|
|
module.exports = function (cmd) {
|
|
require('child_process').exec(cmd);
|
|
};
|
|
";
|
|
|
|
const CROSS_FILE_A_AFTER: &str = "\
|
|
// unrelated comment added above the exported callback
|
|
module.exports = function (cmd) {
|
|
require('child_process').exec(cmd);
|
|
};
|
|
";
|
|
|
|
const CROSS_FILE_B: &str = "\
|
|
const run = require('./a');
|
|
run(process.env.INPUT);
|
|
";
|
|
|
|
#[test]
|
|
fn cross_file_anon_callback_stable_across_comment_insert() {
|
|
let td = tempfile::tempdir().expect("tempdir");
|
|
let root = td.path().to_path_buf();
|
|
let a = root.join("a.js");
|
|
let b = root.join("b.js");
|
|
fs::write(&a, CROSS_FILE_A_BEFORE).expect("write a.js");
|
|
fs::write(&b, CROSS_FILE_B).expect("write b.js");
|
|
|
|
let db_path = root.join("incremental.sqlite");
|
|
cold_build_index("incr-cross", &root, &db_path, AnalysisMode::Full);
|
|
let pool = Indexer::init(&db_path).expect("init pool");
|
|
|
|
let before_diags = indexed_scan("incr-cross", &pool, &root, AnalysisMode::Full);
|
|
|
|
// The cross-file flow should resolve: b.js → a.js (exported anon
|
|
// function) → child_process.exec. Count taint findings in either
|
|
// file as the structural invariant, rank/line may legitimately
|
|
// differ between scans, but the *presence* of a taint finding on
|
|
// the sink side must not regress.
|
|
fn taint_count_in(diags: &[Diag], rel: &str) -> usize {
|
|
diags
|
|
.iter()
|
|
.filter(|d| {
|
|
d.id.starts_with("taint-unsanitised-flow")
|
|
&& d.path.replace('\\', "/").ends_with(rel)
|
|
})
|
|
.count()
|
|
}
|
|
|
|
let before_count =
|
|
taint_count_in(&before_diags, "a.js") + taint_count_in(&before_diags, "b.js");
|
|
assert!(
|
|
before_count > 0,
|
|
"baseline must emit a cross-file taint finding through the exported anon callback; \
|
|
got none. Raw diags: {:#?}",
|
|
before_diags
|
|
);
|
|
|
|
// Edit a.js: prepend a comment line above the exported anon fn.
|
|
fs::write(&a, CROSS_FILE_A_AFTER).expect("rewrite a.js");
|
|
let after_diags = indexed_scan("incr-cross", &pool, &root, AnalysisMode::Full);
|
|
let after_count = taint_count_in(&after_diags, "a.js") + taint_count_in(&after_diags, "b.js");
|
|
|
|
assert_eq!(
|
|
before_count, after_count,
|
|
"cross-file anon callback resolution regressed after unrelated comment insert \
|
|
(before={before_count}, after={after_count}).\n\
|
|
Raw before: {:#?}\n\
|
|
Raw after: {:#?}",
|
|
before_diags, after_diags
|
|
);
|
|
}
|