mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-12 19:55:14 +02:00
Feat/configurable sanitizers and js precision (#32)
* chore: Exclude CLAUDE.md from Cargo.toml * feat: Add configurable analysis rules and CLI commands for custom sanitizers and terminators * feat: Enhance resource management and analysis efficiency - Implemented parallel summary merging in `scan_filesystem` using rayon for improved performance. - Introduced `GlobalSummaries::merge()` for efficient merging of summaries. - Optimized file reading and hashing to eliminate redundant I/O operations. - Added `should_scan_with_hash()` and `upsert_file_with_hash()` methods to streamline file processing. - Enhanced taint analysis with in-place mutations to reduce memory allocations. - Updated resource acquisition patterns to exclude false positives for `freopen` and wrapper functions. * feat: Implement severity downgrade for findings in non-production paths and add source kind inference * feat: Update versioning information in SECURITY.md for new stable line * feat: Update categories in Cargo.toml to include parser-implementations and text-processing * feat: Update dependencies in Cargo.lock for improved compatibility and performance * feat: Update dependencies in Cargo.lock and Cargo.toml for improved compatibility
This commit is contained in:
parent
f96a89e7c1
commit
19b578c5c4
37 changed files with 3775 additions and 432 deletions
325
src/ast.rs
325
src/ast.rs
|
|
@ -2,6 +2,7 @@ use crate::cfg::{build_cfg, export_summaries};
|
|||
use crate::cfg_analysis;
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::errors::{NyxError, NyxResult};
|
||||
use crate::labels::{build_lang_rules, severity_for_source_kind};
|
||||
use crate::patterns::Severity;
|
||||
use crate::summary::{FuncSummary, GlobalSummaries};
|
||||
use crate::symbol::{Lang, normalize_namespace};
|
||||
|
|
@ -53,6 +54,53 @@ fn is_binary(bytes: &[u8]) -> bool {
|
|||
bytes.iter().filter(|b| **b == 0).count() * 100 / bytes.len().max(1) > 1
|
||||
}
|
||||
|
||||
/// Check if a file path belongs to a non-production context (tests, vendor,
|
||||
/// benchmarks, etc.). Used to downgrade severity for findings in paths that
|
||||
/// are unlikely to represent attack surface.
|
||||
fn is_nonprod_path(path: &Path) -> bool {
|
||||
static NONPROD_DIRS: &[&str] = &[
|
||||
"tests",
|
||||
"test",
|
||||
"__tests__",
|
||||
"benches",
|
||||
"benchmarks",
|
||||
"examples",
|
||||
"build",
|
||||
"scripts",
|
||||
"docs",
|
||||
"js_tests",
|
||||
"fixtures",
|
||||
"vendor",
|
||||
];
|
||||
static NONPROD_FILES: &[&str] = &["build.rs"];
|
||||
|
||||
if let Some(name) = path.file_name().and_then(|n| n.to_str())
|
||||
&& (NONPROD_FILES.contains(&name) || name.ends_with(".min.js"))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
for component in path.components() {
|
||||
if let std::path::Component::Normal(c) = component
|
||||
&& let Some(s) = c.to_str()
|
||||
&& NONPROD_DIRS.contains(&s)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Downgrade severity by one tier: High→Medium, Medium→Low, Low→Low.
|
||||
fn downgrade_severity(s: Severity) -> Severity {
|
||||
match s {
|
||||
Severity::High => Severity::Medium,
|
||||
Severity::Medium => Severity::Low,
|
||||
Severity::Low => Severity::Low,
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Pass 1: Extract function summaries (no taint analysis)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
|
@ -84,7 +132,17 @@ pub fn extract_summaries_from_bytes(
|
|||
})?;
|
||||
|
||||
let file_path_str = path.to_string_lossy();
|
||||
let (_cfg_graph, _entry, local_summaries) = build_cfg(&tree, bytes, lang_slug, &file_path_str);
|
||||
let lang_rules = build_lang_rules(_cfg, lang_slug);
|
||||
let rules_ref = if lang_rules.extra_labels.is_empty()
|
||||
&& lang_rules.terminators.is_empty()
|
||||
&& lang_rules.event_handlers.is_empty()
|
||||
{
|
||||
None
|
||||
} else {
|
||||
Some(&lang_rules)
|
||||
};
|
||||
let (_cfg_graph, _entry, local_summaries) =
|
||||
build_cfg(&tree, bytes, lang_slug, &file_path_str, rules_ref);
|
||||
|
||||
Ok(export_summaries(
|
||||
&local_summaries,
|
||||
|
|
@ -95,6 +153,7 @@ pub fn extract_summaries_from_bytes(
|
|||
|
||||
/// Convenience wrapper that reads the file then delegates to
|
||||
/// [`extract_summaries_from_bytes`].
|
||||
#[allow(dead_code)] // used by benchmarks and lib consumers
|
||||
pub fn extract_summaries_from_file(path: &Path, cfg: &Config) -> NyxResult<Vec<FuncSummary>> {
|
||||
let bytes = std::fs::read(path)?;
|
||||
extract_summaries_from_bytes(&bytes, path, cfg)
|
||||
|
|
@ -142,7 +201,17 @@ pub fn run_rules_on_bytes(
|
|||
|
||||
if needs_cfg {
|
||||
// Build CFG — needed for both taint analysis and CFG structural analyses.
|
||||
let (cfg_graph, entry, summaries) = build_cfg(&_tree, bytes, lang_slug, &file_path_str);
|
||||
let lang_rules = build_lang_rules(cfg, lang_slug);
|
||||
let rules_ref = if lang_rules.extra_labels.is_empty()
|
||||
&& lang_rules.terminators.is_empty()
|
||||
&& lang_rules.event_handlers.is_empty()
|
||||
{
|
||||
None
|
||||
} else {
|
||||
Some(&lang_rules)
|
||||
};
|
||||
let (cfg_graph, entry, summaries) =
|
||||
build_cfg(&_tree, bytes, lang_slug, &file_path_str, rules_ref);
|
||||
let caller_lang = Lang::from_slug(lang_slug).unwrap_or(Lang::Rust);
|
||||
|
||||
// ── Taint analysis ──────────────────────────────────────────────
|
||||
|
|
@ -174,7 +243,7 @@ pub fn run_rules_on_bytes(
|
|||
path: path.to_string_lossy().into_owned(),
|
||||
line: sink_point.row + 1,
|
||||
col: sink_point.column + 1,
|
||||
severity: Severity::High,
|
||||
severity: severity_for_source_kind(finding.source_kind),
|
||||
id: format!(
|
||||
"taint-unsanitised-flow (source {}:{})",
|
||||
source_point.row + 1,
|
||||
|
|
@ -184,6 +253,7 @@ pub fn run_rules_on_bytes(
|
|||
}
|
||||
|
||||
// ── CFG structural analyses ─────────────────────────────────────
|
||||
let taint_active = global_summaries.is_some() || !taint_results.is_empty();
|
||||
let cfg_ctx = cfg_analysis::AnalysisContext {
|
||||
cfg: &cfg_graph,
|
||||
entry,
|
||||
|
|
@ -193,6 +263,8 @@ pub fn run_rules_on_bytes(
|
|||
func_summaries: &summaries,
|
||||
global_summaries,
|
||||
taint_findings: &taint_results,
|
||||
analysis_rules: rules_ref,
|
||||
taint_active,
|
||||
};
|
||||
for cf in cfg_analysis::run_all(&cfg_ctx) {
|
||||
let point = byte_offset_to_point(&_tree, cf.span.0);
|
||||
|
|
@ -238,6 +310,13 @@ pub fn run_rules_on_bytes(
|
|||
a.line == b.line && a.col == b.col && a.id == b.id && a.severity == b.severity
|
||||
});
|
||||
|
||||
// Downgrade severity for non-production paths unless opted out
|
||||
if !cfg.scanner.include_nonprod && is_nonprod_path(path) {
|
||||
for d in &mut out {
|
||||
d.severity = downgrade_severity(d.severity);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
|
|
@ -253,6 +332,184 @@ pub fn run_rules_on_file(
|
|||
run_rules_on_bytes(&bytes, path, cfg, global_summaries, scan_root)
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Fused single-pass: extract summaries + run full analysis in one parse/CFG
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Result of a fused analysis pass: both function summaries and diagnostics.
|
||||
pub struct FusedResult {
|
||||
pub summaries: Vec<FuncSummary>,
|
||||
pub diags: Vec<Diag>,
|
||||
}
|
||||
|
||||
/// Parse the file once, build the CFG once, and produce both function
|
||||
/// summaries (for cross-file resolution) and full diagnostics (AST queries +
|
||||
/// taint + CFG structural analyses).
|
||||
///
|
||||
/// When `global_summaries` is `None`, the taint engine runs with local
|
||||
/// context only (equivalent to pass 1 + partial pass 2). A second call
|
||||
/// to [`run_taint_only`] can refine findings with the full cross-file view
|
||||
/// without re-parsing or re-building the CFG.
|
||||
pub fn analyse_file_fused(
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
cfg: &Config,
|
||||
global_summaries: Option<&GlobalSummaries>,
|
||||
scan_root: Option<&Path>,
|
||||
) -> NyxResult<FusedResult> {
|
||||
let _span = tracing::debug_span!("analyse_fused", file = %path.display()).entered();
|
||||
|
||||
if is_binary(bytes) {
|
||||
return Ok(FusedResult {
|
||||
summaries: vec![],
|
||||
diags: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
let Some((ts_lang, lang_slug)) = lang_for_path(path) else {
|
||||
return Ok(FusedResult {
|
||||
summaries: vec![],
|
||||
diags: vec![],
|
||||
});
|
||||
};
|
||||
|
||||
let tree = PARSER.with(|cell| {
|
||||
let mut parser = cell.borrow_mut();
|
||||
parser.set_language(&ts_lang)?;
|
||||
parser
|
||||
.parse(bytes, None)
|
||||
.ok_or_else(|| NyxError::Other("tree-sitter failed".into()))
|
||||
})?;
|
||||
|
||||
let file_path_str = path.to_string_lossy();
|
||||
|
||||
// Build language-specific analysis rules once
|
||||
let lang_rules = build_lang_rules(cfg, lang_slug);
|
||||
let rules_ref = if lang_rules.extra_labels.is_empty()
|
||||
&& lang_rules.terminators.is_empty()
|
||||
&& lang_rules.event_handlers.is_empty()
|
||||
{
|
||||
None
|
||||
} else {
|
||||
Some(&lang_rules)
|
||||
};
|
||||
|
||||
// Build CFG once — used for both summary extraction AND analysis
|
||||
let (cfg_graph, entry, local_summaries) =
|
||||
build_cfg(&tree, bytes, lang_slug, &file_path_str, rules_ref);
|
||||
|
||||
// Export summaries (always — needed for cross-file merging)
|
||||
let summaries = export_summaries(&local_summaries, &file_path_str, lang_slug);
|
||||
|
||||
let mut out = Vec::new();
|
||||
|
||||
// Taint + CFG structural analyses
|
||||
let needs_cfg =
|
||||
cfg.scanner.mode == AnalysisMode::Full || cfg.scanner.mode == AnalysisMode::Taint;
|
||||
|
||||
if needs_cfg {
|
||||
let caller_lang = Lang::from_slug(lang_slug).unwrap_or(Lang::Rust);
|
||||
let scan_root_str = scan_root.map(|p| p.to_string_lossy());
|
||||
let namespace = normalize_namespace(&file_path_str, scan_root_str.as_deref());
|
||||
|
||||
let taint_results = analyse_file(
|
||||
&cfg_graph,
|
||||
entry,
|
||||
&local_summaries,
|
||||
global_summaries,
|
||||
caller_lang,
|
||||
&namespace,
|
||||
&[],
|
||||
);
|
||||
for finding in &taint_results {
|
||||
let sink_byte = cfg_graph[finding.sink].span.0;
|
||||
let sink_point = byte_offset_to_point(&tree, sink_byte);
|
||||
let source_byte = cfg_graph[finding.source].span.0;
|
||||
let source_point = byte_offset_to_point(&tree, source_byte);
|
||||
|
||||
out.push(Diag {
|
||||
path: path.to_string_lossy().into_owned(),
|
||||
line: sink_point.row + 1,
|
||||
col: sink_point.column + 1,
|
||||
severity: severity_for_source_kind(finding.source_kind),
|
||||
id: format!(
|
||||
"taint-unsanitised-flow (source {}:{})",
|
||||
source_point.row + 1,
|
||||
source_point.column + 1
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
let taint_active = global_summaries.is_some() || !taint_results.is_empty();
|
||||
let cfg_ctx = cfg_analysis::AnalysisContext {
|
||||
cfg: &cfg_graph,
|
||||
entry,
|
||||
lang: caller_lang,
|
||||
file_path: &file_path_str,
|
||||
source_bytes: bytes,
|
||||
func_summaries: &local_summaries,
|
||||
global_summaries,
|
||||
taint_findings: &taint_results,
|
||||
analysis_rules: rules_ref,
|
||||
taint_active,
|
||||
};
|
||||
for cf in cfg_analysis::run_all(&cfg_ctx) {
|
||||
let point = byte_offset_to_point(&tree, cf.span.0);
|
||||
out.push(Diag {
|
||||
path: path.to_string_lossy().into_owned(),
|
||||
line: point.row + 1,
|
||||
col: point.column + 1,
|
||||
severity: cf.severity,
|
||||
id: cf.rule_id,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// AST pattern queries
|
||||
if cfg.scanner.mode == AnalysisMode::Full || cfg.scanner.mode == AnalysisMode::Ast {
|
||||
let root = tree.root_node();
|
||||
let compiled = query_cache::for_lang(lang_slug, ts_lang);
|
||||
let mut cursor = QueryCursor::new();
|
||||
|
||||
for cq in compiled.iter() {
|
||||
if cfg.scanner.min_severity <= cq.meta.severity {
|
||||
continue;
|
||||
}
|
||||
let mut matches = cursor.matches(&cq.query, root, bytes);
|
||||
while let Some(m) = matches.next() {
|
||||
if let Some(cap) = m.captures.iter().find(|c| c.index == 0) {
|
||||
let point = cap.node.start_position();
|
||||
out.push(Diag {
|
||||
path: path.to_string_lossy().into_owned(),
|
||||
line: point.row + 1,
|
||||
col: point.column + 1,
|
||||
severity: cq.meta.severity,
|
||||
id: cq.meta.id.to_owned(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Dedup
|
||||
out.sort_by(|a, b| (a.line, a.col, &a.id, a.severity).cmp(&(b.line, b.col, &b.id, b.severity)));
|
||||
out.dedup_by(|a, b| {
|
||||
a.line == b.line && a.col == b.col && a.id == b.id && a.severity == b.severity
|
||||
});
|
||||
|
||||
// Downgrade severity for non-production paths unless opted out
|
||||
if !cfg.scanner.include_nonprod && is_nonprod_path(path) {
|
||||
for d in &mut out {
|
||||
d.severity = downgrade_severity(d.severity);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(FusedResult {
|
||||
summaries,
|
||||
diags: out,
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_extension_returns_empty() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
|
|
@ -279,3 +536,65 @@ fn binary_file_guard_triggers() {
|
|||
let diags = run_rules_on_file(&bin, &Config::default(), None, None).unwrap();
|
||||
assert!(diags.is_empty(), "binary files are skipped");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nonprod_path_detection() {
|
||||
// Test that is_nonprod_path recognises common non-production paths
|
||||
assert!(is_nonprod_path(Path::new("project/tests/test_main.py")));
|
||||
assert!(is_nonprod_path(Path::new("src/__tests__/foo.js")));
|
||||
assert!(is_nonprod_path(Path::new("benches/bench.rs")));
|
||||
assert!(is_nonprod_path(Path::new("vendor/lib/foo.py")));
|
||||
assert!(is_nonprod_path(Path::new("src/build.rs")));
|
||||
assert!(is_nonprod_path(Path::new("dist/app.min.js")));
|
||||
assert!(is_nonprod_path(Path::new("examples/demo.py")));
|
||||
assert!(is_nonprod_path(Path::new("fixtures/data.json")));
|
||||
|
||||
// Should NOT match production paths
|
||||
assert!(!is_nonprod_path(Path::new("src/main.rs")));
|
||||
assert!(!is_nonprod_path(Path::new("lib/handler.py")));
|
||||
assert!(!is_nonprod_path(Path::new("app/views.py")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn severity_downgrade_works() {
|
||||
assert_eq!(downgrade_severity(Severity::High), Severity::Medium);
|
||||
assert_eq!(downgrade_severity(Severity::Medium), Severity::Low);
|
||||
assert_eq!(downgrade_severity(Severity::Low), Severity::Low);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nonprod_path_downgrades_findings() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
// Create a file under a "tests" directory
|
||||
let test_dir = dir.path().join("tests");
|
||||
std::fs::create_dir_all(&test_dir).unwrap();
|
||||
let test_file = test_dir.join("test_cmd.py");
|
||||
std::fs::write(
|
||||
&test_file,
|
||||
b"import os\ndef test():\n cmd = os.environ['X']\n os.system(cmd)\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let default_cfg = Config::default();
|
||||
let diags = run_rules_on_file(&test_file, &default_cfg, None, None).unwrap();
|
||||
|
||||
// All findings in tests/ should be downgraded (no HIGH)
|
||||
let high: Vec<_> = diags
|
||||
.iter()
|
||||
.filter(|d| d.severity == Severity::High)
|
||||
.collect();
|
||||
assert!(
|
||||
high.is_empty(),
|
||||
"Findings in tests/ should be downgraded from HIGH; got {:?}",
|
||||
high
|
||||
);
|
||||
|
||||
// With include_nonprod=true, original severity preserved
|
||||
let mut prod_cfg = Config::default();
|
||||
prod_cfg.scanner.include_nonprod = true;
|
||||
let diags_prod = run_rules_on_file(&test_file, &prod_cfg, None, None).unwrap();
|
||||
|
||||
// Not all diagnostics are necessarily high, but include_nonprod should not downgrade
|
||||
// Just verify that if there are findings, they weren't downgraded by the nonprod logic
|
||||
let _ = diags_prod;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue