mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
Feat/configurable sanitizers and js precision (#32)
* chore: Exclude CLAUDE.md from Cargo.toml * feat: Add configurable analysis rules and CLI commands for custom sanitizers and terminators * feat: Enhance resource management and analysis efficiency - Implemented parallel summary merging in `scan_filesystem` using rayon for improved performance. - Introduced `GlobalSummaries::merge()` for efficient merging of summaries. - Optimized file reading and hashing to eliminate redundant I/O operations. - Added `should_scan_with_hash()` and `upsert_file_with_hash()` methods to streamline file processing. - Enhanced taint analysis with in-place mutations to reduce memory allocations. - Updated resource acquisition patterns to exclude false positives for `freopen` and wrapper functions. * feat: Implement severity downgrade for findings in non-production paths and add source kind inference * feat: Update versioning information in SECURITY.md for new stable line * feat: Update categories in Cargo.toml to include parser-implementations and text-processing * feat: Update dependencies in Cargo.lock for improved compatibility and performance * feat: Update dependencies in Cargo.lock and Cargo.toml for improved compatibility
This commit is contained in:
parent
f96a89e7c1
commit
19b578c5c4
37 changed files with 3775 additions and 432 deletions
325
src/ast.rs
325
src/ast.rs
|
|
@ -2,6 +2,7 @@ use crate::cfg::{build_cfg, export_summaries};
|
|||
use crate::cfg_analysis;
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::errors::{NyxError, NyxResult};
|
||||
use crate::labels::{build_lang_rules, severity_for_source_kind};
|
||||
use crate::patterns::Severity;
|
||||
use crate::summary::{FuncSummary, GlobalSummaries};
|
||||
use crate::symbol::{Lang, normalize_namespace};
|
||||
|
|
@ -53,6 +54,53 @@ fn is_binary(bytes: &[u8]) -> bool {
|
|||
bytes.iter().filter(|b| **b == 0).count() * 100 / bytes.len().max(1) > 1
|
||||
}
|
||||
|
||||
/// Check if a file path belongs to a non-production context (tests, vendor,
|
||||
/// benchmarks, etc.). Used to downgrade severity for findings in paths that
|
||||
/// are unlikely to represent attack surface.
|
||||
fn is_nonprod_path(path: &Path) -> bool {
|
||||
static NONPROD_DIRS: &[&str] = &[
|
||||
"tests",
|
||||
"test",
|
||||
"__tests__",
|
||||
"benches",
|
||||
"benchmarks",
|
||||
"examples",
|
||||
"build",
|
||||
"scripts",
|
||||
"docs",
|
||||
"js_tests",
|
||||
"fixtures",
|
||||
"vendor",
|
||||
];
|
||||
static NONPROD_FILES: &[&str] = &["build.rs"];
|
||||
|
||||
if let Some(name) = path.file_name().and_then(|n| n.to_str())
|
||||
&& (NONPROD_FILES.contains(&name) || name.ends_with(".min.js"))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
for component in path.components() {
|
||||
if let std::path::Component::Normal(c) = component
|
||||
&& let Some(s) = c.to_str()
|
||||
&& NONPROD_DIRS.contains(&s)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Downgrade severity by one tier: High→Medium, Medium→Low, Low→Low.
|
||||
fn downgrade_severity(s: Severity) -> Severity {
|
||||
match s {
|
||||
Severity::High => Severity::Medium,
|
||||
Severity::Medium => Severity::Low,
|
||||
Severity::Low => Severity::Low,
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Pass 1: Extract function summaries (no taint analysis)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
|
@ -84,7 +132,17 @@ pub fn extract_summaries_from_bytes(
|
|||
})?;
|
||||
|
||||
let file_path_str = path.to_string_lossy();
|
||||
let (_cfg_graph, _entry, local_summaries) = build_cfg(&tree, bytes, lang_slug, &file_path_str);
|
||||
let lang_rules = build_lang_rules(_cfg, lang_slug);
|
||||
let rules_ref = if lang_rules.extra_labels.is_empty()
|
||||
&& lang_rules.terminators.is_empty()
|
||||
&& lang_rules.event_handlers.is_empty()
|
||||
{
|
||||
None
|
||||
} else {
|
||||
Some(&lang_rules)
|
||||
};
|
||||
let (_cfg_graph, _entry, local_summaries) =
|
||||
build_cfg(&tree, bytes, lang_slug, &file_path_str, rules_ref);
|
||||
|
||||
Ok(export_summaries(
|
||||
&local_summaries,
|
||||
|
|
@ -95,6 +153,7 @@ pub fn extract_summaries_from_bytes(
|
|||
|
||||
/// Convenience wrapper that reads the file then delegates to
|
||||
/// [`extract_summaries_from_bytes`].
|
||||
#[allow(dead_code)] // used by benchmarks and lib consumers
|
||||
pub fn extract_summaries_from_file(path: &Path, cfg: &Config) -> NyxResult<Vec<FuncSummary>> {
|
||||
let bytes = std::fs::read(path)?;
|
||||
extract_summaries_from_bytes(&bytes, path, cfg)
|
||||
|
|
@ -142,7 +201,17 @@ pub fn run_rules_on_bytes(
|
|||
|
||||
if needs_cfg {
|
||||
// Build CFG — needed for both taint analysis and CFG structural analyses.
|
||||
let (cfg_graph, entry, summaries) = build_cfg(&_tree, bytes, lang_slug, &file_path_str);
|
||||
let lang_rules = build_lang_rules(cfg, lang_slug);
|
||||
let rules_ref = if lang_rules.extra_labels.is_empty()
|
||||
&& lang_rules.terminators.is_empty()
|
||||
&& lang_rules.event_handlers.is_empty()
|
||||
{
|
||||
None
|
||||
} else {
|
||||
Some(&lang_rules)
|
||||
};
|
||||
let (cfg_graph, entry, summaries) =
|
||||
build_cfg(&_tree, bytes, lang_slug, &file_path_str, rules_ref);
|
||||
let caller_lang = Lang::from_slug(lang_slug).unwrap_or(Lang::Rust);
|
||||
|
||||
// ── Taint analysis ──────────────────────────────────────────────
|
||||
|
|
@ -174,7 +243,7 @@ pub fn run_rules_on_bytes(
|
|||
path: path.to_string_lossy().into_owned(),
|
||||
line: sink_point.row + 1,
|
||||
col: sink_point.column + 1,
|
||||
severity: Severity::High,
|
||||
severity: severity_for_source_kind(finding.source_kind),
|
||||
id: format!(
|
||||
"taint-unsanitised-flow (source {}:{})",
|
||||
source_point.row + 1,
|
||||
|
|
@ -184,6 +253,7 @@ pub fn run_rules_on_bytes(
|
|||
}
|
||||
|
||||
// ── CFG structural analyses ─────────────────────────────────────
|
||||
let taint_active = global_summaries.is_some() || !taint_results.is_empty();
|
||||
let cfg_ctx = cfg_analysis::AnalysisContext {
|
||||
cfg: &cfg_graph,
|
||||
entry,
|
||||
|
|
@ -193,6 +263,8 @@ pub fn run_rules_on_bytes(
|
|||
func_summaries: &summaries,
|
||||
global_summaries,
|
||||
taint_findings: &taint_results,
|
||||
analysis_rules: rules_ref,
|
||||
taint_active,
|
||||
};
|
||||
for cf in cfg_analysis::run_all(&cfg_ctx) {
|
||||
let point = byte_offset_to_point(&_tree, cf.span.0);
|
||||
|
|
@ -238,6 +310,13 @@ pub fn run_rules_on_bytes(
|
|||
a.line == b.line && a.col == b.col && a.id == b.id && a.severity == b.severity
|
||||
});
|
||||
|
||||
// Downgrade severity for non-production paths unless opted out
|
||||
if !cfg.scanner.include_nonprod && is_nonprod_path(path) {
|
||||
for d in &mut out {
|
||||
d.severity = downgrade_severity(d.severity);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
|
|
@ -253,6 +332,184 @@ pub fn run_rules_on_file(
|
|||
run_rules_on_bytes(&bytes, path, cfg, global_summaries, scan_root)
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Fused single-pass: extract summaries + run full analysis in one parse/CFG
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Result of a fused analysis pass: both function summaries and diagnostics.
|
||||
pub struct FusedResult {
|
||||
pub summaries: Vec<FuncSummary>,
|
||||
pub diags: Vec<Diag>,
|
||||
}
|
||||
|
||||
/// Parse the file once, build the CFG once, and produce both function
|
||||
/// summaries (for cross-file resolution) and full diagnostics (AST queries +
|
||||
/// taint + CFG structural analyses).
|
||||
///
|
||||
/// When `global_summaries` is `None`, the taint engine runs with local
|
||||
/// context only (equivalent to pass 1 + partial pass 2). A second call
|
||||
/// to [`run_taint_only`] can refine findings with the full cross-file view
|
||||
/// without re-parsing or re-building the CFG.
|
||||
pub fn analyse_file_fused(
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
cfg: &Config,
|
||||
global_summaries: Option<&GlobalSummaries>,
|
||||
scan_root: Option<&Path>,
|
||||
) -> NyxResult<FusedResult> {
|
||||
let _span = tracing::debug_span!("analyse_fused", file = %path.display()).entered();
|
||||
|
||||
if is_binary(bytes) {
|
||||
return Ok(FusedResult {
|
||||
summaries: vec![],
|
||||
diags: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
let Some((ts_lang, lang_slug)) = lang_for_path(path) else {
|
||||
return Ok(FusedResult {
|
||||
summaries: vec![],
|
||||
diags: vec![],
|
||||
});
|
||||
};
|
||||
|
||||
let tree = PARSER.with(|cell| {
|
||||
let mut parser = cell.borrow_mut();
|
||||
parser.set_language(&ts_lang)?;
|
||||
parser
|
||||
.parse(bytes, None)
|
||||
.ok_or_else(|| NyxError::Other("tree-sitter failed".into()))
|
||||
})?;
|
||||
|
||||
let file_path_str = path.to_string_lossy();
|
||||
|
||||
// Build language-specific analysis rules once
|
||||
let lang_rules = build_lang_rules(cfg, lang_slug);
|
||||
let rules_ref = if lang_rules.extra_labels.is_empty()
|
||||
&& lang_rules.terminators.is_empty()
|
||||
&& lang_rules.event_handlers.is_empty()
|
||||
{
|
||||
None
|
||||
} else {
|
||||
Some(&lang_rules)
|
||||
};
|
||||
|
||||
// Build CFG once — used for both summary extraction AND analysis
|
||||
let (cfg_graph, entry, local_summaries) =
|
||||
build_cfg(&tree, bytes, lang_slug, &file_path_str, rules_ref);
|
||||
|
||||
// Export summaries (always — needed for cross-file merging)
|
||||
let summaries = export_summaries(&local_summaries, &file_path_str, lang_slug);
|
||||
|
||||
let mut out = Vec::new();
|
||||
|
||||
// Taint + CFG structural analyses
|
||||
let needs_cfg =
|
||||
cfg.scanner.mode == AnalysisMode::Full || cfg.scanner.mode == AnalysisMode::Taint;
|
||||
|
||||
if needs_cfg {
|
||||
let caller_lang = Lang::from_slug(lang_slug).unwrap_or(Lang::Rust);
|
||||
let scan_root_str = scan_root.map(|p| p.to_string_lossy());
|
||||
let namespace = normalize_namespace(&file_path_str, scan_root_str.as_deref());
|
||||
|
||||
let taint_results = analyse_file(
|
||||
&cfg_graph,
|
||||
entry,
|
||||
&local_summaries,
|
||||
global_summaries,
|
||||
caller_lang,
|
||||
&namespace,
|
||||
&[],
|
||||
);
|
||||
for finding in &taint_results {
|
||||
let sink_byte = cfg_graph[finding.sink].span.0;
|
||||
let sink_point = byte_offset_to_point(&tree, sink_byte);
|
||||
let source_byte = cfg_graph[finding.source].span.0;
|
||||
let source_point = byte_offset_to_point(&tree, source_byte);
|
||||
|
||||
out.push(Diag {
|
||||
path: path.to_string_lossy().into_owned(),
|
||||
line: sink_point.row + 1,
|
||||
col: sink_point.column + 1,
|
||||
severity: severity_for_source_kind(finding.source_kind),
|
||||
id: format!(
|
||||
"taint-unsanitised-flow (source {}:{})",
|
||||
source_point.row + 1,
|
||||
source_point.column + 1
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
let taint_active = global_summaries.is_some() || !taint_results.is_empty();
|
||||
let cfg_ctx = cfg_analysis::AnalysisContext {
|
||||
cfg: &cfg_graph,
|
||||
entry,
|
||||
lang: caller_lang,
|
||||
file_path: &file_path_str,
|
||||
source_bytes: bytes,
|
||||
func_summaries: &local_summaries,
|
||||
global_summaries,
|
||||
taint_findings: &taint_results,
|
||||
analysis_rules: rules_ref,
|
||||
taint_active,
|
||||
};
|
||||
for cf in cfg_analysis::run_all(&cfg_ctx) {
|
||||
let point = byte_offset_to_point(&tree, cf.span.0);
|
||||
out.push(Diag {
|
||||
path: path.to_string_lossy().into_owned(),
|
||||
line: point.row + 1,
|
||||
col: point.column + 1,
|
||||
severity: cf.severity,
|
||||
id: cf.rule_id,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// AST pattern queries
|
||||
if cfg.scanner.mode == AnalysisMode::Full || cfg.scanner.mode == AnalysisMode::Ast {
|
||||
let root = tree.root_node();
|
||||
let compiled = query_cache::for_lang(lang_slug, ts_lang);
|
||||
let mut cursor = QueryCursor::new();
|
||||
|
||||
for cq in compiled.iter() {
|
||||
if cfg.scanner.min_severity <= cq.meta.severity {
|
||||
continue;
|
||||
}
|
||||
let mut matches = cursor.matches(&cq.query, root, bytes);
|
||||
while let Some(m) = matches.next() {
|
||||
if let Some(cap) = m.captures.iter().find(|c| c.index == 0) {
|
||||
let point = cap.node.start_position();
|
||||
out.push(Diag {
|
||||
path: path.to_string_lossy().into_owned(),
|
||||
line: point.row + 1,
|
||||
col: point.column + 1,
|
||||
severity: cq.meta.severity,
|
||||
id: cq.meta.id.to_owned(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Dedup
|
||||
out.sort_by(|a, b| (a.line, a.col, &a.id, a.severity).cmp(&(b.line, b.col, &b.id, b.severity)));
|
||||
out.dedup_by(|a, b| {
|
||||
a.line == b.line && a.col == b.col && a.id == b.id && a.severity == b.severity
|
||||
});
|
||||
|
||||
// Downgrade severity for non-production paths unless opted out
|
||||
if !cfg.scanner.include_nonprod && is_nonprod_path(path) {
|
||||
for d in &mut out {
|
||||
d.severity = downgrade_severity(d.severity);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(FusedResult {
|
||||
summaries,
|
||||
diags: out,
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_extension_returns_empty() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
|
|
@ -279,3 +536,65 @@ fn binary_file_guard_triggers() {
|
|||
let diags = run_rules_on_file(&bin, &Config::default(), None, None).unwrap();
|
||||
assert!(diags.is_empty(), "binary files are skipped");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nonprod_path_detection() {
|
||||
// Test that is_nonprod_path recognises common non-production paths
|
||||
assert!(is_nonprod_path(Path::new("project/tests/test_main.py")));
|
||||
assert!(is_nonprod_path(Path::new("src/__tests__/foo.js")));
|
||||
assert!(is_nonprod_path(Path::new("benches/bench.rs")));
|
||||
assert!(is_nonprod_path(Path::new("vendor/lib/foo.py")));
|
||||
assert!(is_nonprod_path(Path::new("src/build.rs")));
|
||||
assert!(is_nonprod_path(Path::new("dist/app.min.js")));
|
||||
assert!(is_nonprod_path(Path::new("examples/demo.py")));
|
||||
assert!(is_nonprod_path(Path::new("fixtures/data.json")));
|
||||
|
||||
// Should NOT match production paths
|
||||
assert!(!is_nonprod_path(Path::new("src/main.rs")));
|
||||
assert!(!is_nonprod_path(Path::new("lib/handler.py")));
|
||||
assert!(!is_nonprod_path(Path::new("app/views.py")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn severity_downgrade_works() {
|
||||
assert_eq!(downgrade_severity(Severity::High), Severity::Medium);
|
||||
assert_eq!(downgrade_severity(Severity::Medium), Severity::Low);
|
||||
assert_eq!(downgrade_severity(Severity::Low), Severity::Low);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nonprod_path_downgrades_findings() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
// Create a file under a "tests" directory
|
||||
let test_dir = dir.path().join("tests");
|
||||
std::fs::create_dir_all(&test_dir).unwrap();
|
||||
let test_file = test_dir.join("test_cmd.py");
|
||||
std::fs::write(
|
||||
&test_file,
|
||||
b"import os\ndef test():\n cmd = os.environ['X']\n os.system(cmd)\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let default_cfg = Config::default();
|
||||
let diags = run_rules_on_file(&test_file, &default_cfg, None, None).unwrap();
|
||||
|
||||
// All findings in tests/ should be downgraded (no HIGH)
|
||||
let high: Vec<_> = diags
|
||||
.iter()
|
||||
.filter(|d| d.severity == Severity::High)
|
||||
.collect();
|
||||
assert!(
|
||||
high.is_empty(),
|
||||
"Findings in tests/ should be downgraded from HIGH; got {:?}",
|
||||
high
|
||||
);
|
||||
|
||||
// With include_nonprod=true, original severity preserved
|
||||
let mut prod_cfg = Config::default();
|
||||
prod_cfg.scanner.include_nonprod = true;
|
||||
let diags_prod = run_rules_on_file(&test_file, &prod_cfg, None, None).unwrap();
|
||||
|
||||
// Not all diagnostics are necessarily high, but include_nonprod should not downgrade
|
||||
// Just verify that if there are findings, they weren't downgraded by the nonprod logic
|
||||
let _ = diags_prod;
|
||||
}
|
||||
|
|
|
|||
335
src/cfg.rs
335
src/cfg.rs
|
|
@ -3,7 +3,7 @@ use petgraph::prelude::*;
|
|||
use tracing::debug;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
use crate::labels::{Cap, DataLabel, Kind, classify, lookup, param_config};
|
||||
use crate::labels::{Cap, DataLabel, Kind, LangAnalysisRules, classify, lookup, param_config};
|
||||
use crate::summary::FuncSummary;
|
||||
use crate::symbol::{FuncKey, Lang};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
|
@ -186,7 +186,12 @@ fn member_expr_text(n: Node, code: &[u8]) -> Option<String> {
|
|||
}
|
||||
|
||||
/// Recursively search `n` for a member expression whose text classifies as a label.
|
||||
fn first_member_label(n: Node, lang: &str, code: &[u8]) -> Option<DataLabel> {
|
||||
fn first_member_label(
|
||||
n: Node,
|
||||
lang: &str,
|
||||
code: &[u8],
|
||||
extra_labels: Option<&[crate::labels::RuntimeLabelRule]>,
|
||||
) -> Option<DataLabel> {
|
||||
match n.kind() {
|
||||
"member_expression" | "attribute" | "selector_expression" => {
|
||||
if let Some(full) = member_expr_text(n, code) {
|
||||
|
|
@ -194,7 +199,7 @@ fn first_member_label(n: Node, lang: &str, code: &[u8]) -> Option<DataLabel> {
|
|||
// to match rules like "process.env" from "process.env.CMD".
|
||||
let mut candidate = full.as_str();
|
||||
loop {
|
||||
if let Some(lbl) = classify(lang, candidate) {
|
||||
if let Some(lbl) = classify(lang, candidate, extra_labels) {
|
||||
return Some(lbl);
|
||||
}
|
||||
match candidate.rsplit_once('.') {
|
||||
|
|
@ -208,7 +213,7 @@ fn first_member_label(n: Node, lang: &str, code: &[u8]) -> Option<DataLabel> {
|
|||
}
|
||||
let mut cursor = n.walk();
|
||||
for child in n.children(&mut cursor) {
|
||||
if let Some(lbl) = first_member_label(child, lang, code) {
|
||||
if let Some(lbl) = first_member_label(child, lang, code, extra_labels) {
|
||||
return Some(lbl);
|
||||
}
|
||||
}
|
||||
|
|
@ -366,6 +371,7 @@ fn def_use(ast: Node, lang: &str, code: &[u8]) -> (Option<String>, Vec<String>)
|
|||
}
|
||||
|
||||
/// Create a node in one short borrow and optionally attach a taint label.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn push_node<'a>(
|
||||
g: &mut Cfg,
|
||||
kind: StmtKind,
|
||||
|
|
@ -374,6 +380,7 @@ fn push_node<'a>(
|
|||
code: &'a [u8],
|
||||
enclosing_func: Option<&str>,
|
||||
call_ordinal: u32,
|
||||
analysis_rules: Option<&LangAnalysisRules>,
|
||||
) -> NodeIndex {
|
||||
/* ── 1. IDENTIFIER EXTRACTION ─────────────────────────────────────── */
|
||||
|
||||
|
|
@ -427,7 +434,8 @@ fn push_node<'a>(
|
|||
|
||||
/* ── 2. LABEL LOOK-UP ───────────────────────────────────────────── */
|
||||
|
||||
let mut label = classify(lang, &text);
|
||||
let extra = analysis_rules.map(|r| r.extra_labels.as_slice());
|
||||
let mut label = classify(lang, &text, extra);
|
||||
|
||||
// For assignments like `element.innerHTML = value`, the inner-call heuristic
|
||||
// above may have overridden `text` with a call on the RHS (e.g. getElementById).
|
||||
|
|
@ -450,10 +458,20 @@ fn push_node<'a>(
|
|||
|
||||
if let Some(assign) = assign_node
|
||||
&& let Some(lhs) = assign.child_by_field_name("left")
|
||||
&& let Some(prop) = lhs.child_by_field_name("property")
|
||||
&& let Some(prop_text) = text_of(prop, code)
|
||||
{
|
||||
label = classify(lang, &prop_text);
|
||||
// Try full member expression first (e.g. "location.href") — more
|
||||
// specific and avoids false positives on `a.href`.
|
||||
if let Some(full) = member_expr_text(lhs, code) {
|
||||
label = classify(lang, &full, extra);
|
||||
}
|
||||
// Fall back to property-only (e.g. "innerHTML") for sinks that
|
||||
// don't need object context.
|
||||
if label.is_none()
|
||||
&& let Some(prop) = lhs.child_by_field_name("property")
|
||||
&& let Some(prop_text) = text_of(prop, code)
|
||||
{
|
||||
label = classify(lang, &prop_text, extra);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -466,7 +484,7 @@ fn push_node<'a>(
|
|||
lookup(lang, ast.kind()),
|
||||
Kind::CallWrapper | Kind::Assignment
|
||||
)
|
||||
&& let Some(found) = first_member_label(ast, lang, code)
|
||||
&& let Some(found) = first_member_label(ast, lang, code, extra)
|
||||
{
|
||||
label = Some(found);
|
||||
// Update text so the callee name reflects the source
|
||||
|
|
@ -564,6 +582,19 @@ fn extract_param_names<'a>(func_node: Node<'a>, lang: &str, code: &'a [u8]) -> V
|
|||
names
|
||||
}
|
||||
|
||||
/// Check if a callee name matches any configured terminator.
|
||||
fn is_configured_terminator(callee: &str, analysis_rules: Option<&LangAnalysisRules>) -> bool {
|
||||
if let Some(rules) = analysis_rules {
|
||||
let callee_lower = callee.to_ascii_lowercase();
|
||||
rules
|
||||
.terminators
|
||||
.iter()
|
||||
.any(|t| callee_lower == t.to_ascii_lowercase())
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Add the same edge (of the same kind) from every node in `froms` to `to`.
|
||||
#[inline]
|
||||
fn connect_all(g: &mut Cfg, froms: &[NodeIndex], to: NodeIndex, kind: EdgeKind) {
|
||||
|
|
@ -588,6 +619,9 @@ fn build_sub<'a>(
|
|||
file_path: &str,
|
||||
enclosing_func: Option<&str>,
|
||||
call_ordinal: &mut u32,
|
||||
analysis_rules: Option<&LangAnalysisRules>,
|
||||
break_targets: &mut Vec<NodeIndex>,
|
||||
continue_targets: &mut Vec<NodeIndex>,
|
||||
) -> Vec<NodeIndex> {
|
||||
match lookup(lang, ast.kind()) {
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
|
|
@ -595,7 +629,16 @@ fn build_sub<'a>(
|
|||
// ─────────────────────────────────────────────────────────────────
|
||||
Kind::If => {
|
||||
// Condition node
|
||||
let cond = push_node(g, StmtKind::If, ast, lang, code, enclosing_func, 0);
|
||||
let cond = push_node(
|
||||
g,
|
||||
StmtKind::If,
|
||||
ast,
|
||||
lang,
|
||||
code,
|
||||
enclosing_func,
|
||||
0,
|
||||
analysis_rules,
|
||||
);
|
||||
connect_all(g, preds, cond, EdgeKind::Seq);
|
||||
|
||||
// Locate then & else blocks using field-based lookup first,
|
||||
|
|
@ -620,6 +663,7 @@ fn build_sub<'a>(
|
|||
};
|
||||
|
||||
// THEN branch
|
||||
let then_first_node = NodeIndex::new(g.node_count());
|
||||
let then_exits = if let Some(b) = then_block {
|
||||
let exits = build_sub(
|
||||
b,
|
||||
|
|
@ -631,9 +675,17 @@ fn build_sub<'a>(
|
|||
file_path,
|
||||
enclosing_func,
|
||||
call_ordinal,
|
||||
analysis_rules,
|
||||
break_targets,
|
||||
continue_targets,
|
||||
);
|
||||
// True edges leave the condition
|
||||
if let Some(&first) = exits.first() {
|
||||
// Add True edge from condition to first node of then-branch.
|
||||
// We use the first node created (by index) rather than the
|
||||
// exit, because the branch may terminate (return/break) and
|
||||
// have no exits.
|
||||
if then_first_node.index() < g.node_count() {
|
||||
connect_all(g, &[cond], then_first_node, EdgeKind::True);
|
||||
} else if let Some(&first) = exits.first() {
|
||||
connect_all(g, &[cond], first, EdgeKind::True);
|
||||
}
|
||||
exits
|
||||
|
|
@ -642,6 +694,7 @@ fn build_sub<'a>(
|
|||
};
|
||||
|
||||
// ELSE branch
|
||||
let else_first_node = NodeIndex::new(g.node_count());
|
||||
let else_exits = if let Some(b) = else_block {
|
||||
let exits = build_sub(
|
||||
b,
|
||||
|
|
@ -653,17 +706,30 @@ fn build_sub<'a>(
|
|||
file_path,
|
||||
enclosing_func,
|
||||
call_ordinal,
|
||||
analysis_rules,
|
||||
break_targets,
|
||||
continue_targets,
|
||||
);
|
||||
if let Some(&first) = exits.first() {
|
||||
if else_first_node.index() < g.node_count() {
|
||||
connect_all(g, &[cond], else_first_node, EdgeKind::False);
|
||||
} else if let Some(&first) = exits.first() {
|
||||
connect_all(g, &[cond], first, EdgeKind::False);
|
||||
}
|
||||
exits
|
||||
} else {
|
||||
// No explicit else → non-taken branch flows to the *then* exits
|
||||
if let Some(&first) = then_exits.first() {
|
||||
connect_all(g, &[cond], first, EdgeKind::False);
|
||||
// No explicit else → if the then-branch falls through
|
||||
// (non-empty exits), the false branch merges with those exits.
|
||||
// If the then-branch terminates (break/return/continue →
|
||||
// empty exits), the false branch flows from the condition
|
||||
// to whatever comes next.
|
||||
if then_exits.is_empty() {
|
||||
vec![cond]
|
||||
} else {
|
||||
if let Some(&first) = then_exits.first() {
|
||||
connect_all(g, &[cond], first, EdgeKind::False);
|
||||
}
|
||||
then_exits.clone()
|
||||
}
|
||||
then_exits.clone()
|
||||
};
|
||||
|
||||
// Frontier = union of both branches
|
||||
|
|
@ -672,9 +738,22 @@ fn build_sub<'a>(
|
|||
|
||||
Kind::InfiniteLoop => {
|
||||
// Synthetic header node
|
||||
let header = push_node(g, StmtKind::Loop, ast, lang, code, enclosing_func, 0);
|
||||
let header = push_node(
|
||||
g,
|
||||
StmtKind::Loop,
|
||||
ast,
|
||||
lang,
|
||||
code,
|
||||
enclosing_func,
|
||||
0,
|
||||
analysis_rules,
|
||||
);
|
||||
connect_all(g, preds, header, EdgeKind::Seq);
|
||||
|
||||
// Fresh break/continue targets scoped to this loop
|
||||
let mut loop_breaks = Vec::new();
|
||||
let mut loop_continues = Vec::new();
|
||||
|
||||
// The body is the single `block` child
|
||||
let body = ast.child_by_field_name("body").expect("loop without body");
|
||||
let body_exits = build_sub(
|
||||
|
|
@ -687,23 +766,49 @@ fn build_sub<'a>(
|
|||
file_path,
|
||||
enclosing_func,
|
||||
call_ordinal,
|
||||
analysis_rules,
|
||||
&mut loop_breaks,
|
||||
&mut loop_continues,
|
||||
);
|
||||
|
||||
// Back-edge from every linear exit to header
|
||||
for &e in &body_exits {
|
||||
connect_all(g, &[e], header, EdgeKind::Back);
|
||||
}
|
||||
// `loop` may break → those exits are frontiers too
|
||||
body_exits.into_iter().chain([header]).collect()
|
||||
// Wire continue targets as back edges to header
|
||||
for &c in &loop_continues {
|
||||
connect_all(g, &[c], header, EdgeKind::Back);
|
||||
}
|
||||
// Break targets become exits of the loop
|
||||
if loop_breaks.is_empty() {
|
||||
// No break → infinite loop; header is the only exit for
|
||||
// downstream code (fallthrough semantics)
|
||||
vec![header]
|
||||
} else {
|
||||
loop_breaks
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// WHILE / FOR: classic loop with a back edge.
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
Kind::While | Kind::For => {
|
||||
let header = push_node(g, StmtKind::Loop, ast, lang, code, enclosing_func, 0);
|
||||
let header = push_node(
|
||||
g,
|
||||
StmtKind::Loop,
|
||||
ast,
|
||||
lang,
|
||||
code,
|
||||
enclosing_func,
|
||||
0,
|
||||
analysis_rules,
|
||||
);
|
||||
connect_all(g, preds, header, EdgeKind::Seq);
|
||||
|
||||
// Fresh break/continue targets scoped to this loop
|
||||
let mut loop_breaks = Vec::new();
|
||||
let mut loop_continues = Vec::new();
|
||||
|
||||
// Body = first (and usually only) block child.
|
||||
let body = ast
|
||||
.child_by_field_name("body")
|
||||
|
|
@ -724,14 +829,24 @@ fn build_sub<'a>(
|
|||
file_path,
|
||||
enclosing_func,
|
||||
call_ordinal,
|
||||
analysis_rules,
|
||||
&mut loop_breaks,
|
||||
&mut loop_continues,
|
||||
);
|
||||
|
||||
// Back‑edge for every linear exit → header.
|
||||
for &e in &body_exits {
|
||||
connect_all(g, &[e], header, EdgeKind::Back);
|
||||
}
|
||||
// Falling out of the loop = header’s false branch.
|
||||
vec![header]
|
||||
// Wire continue targets as back edges to header
|
||||
for &c in &loop_continues {
|
||||
connect_all(g, &[c], header, EdgeKind::Back);
|
||||
}
|
||||
// Falling out of the loop = header’s false branch +
|
||||
// any break targets that exit the loop.
|
||||
let mut exits = vec![header];
|
||||
exits.extend(loop_breaks);
|
||||
exits
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
|
|
@ -743,25 +858,72 @@ fn build_sub<'a>(
|
|||
// that callee labels (source/sanitizer/sink) are applied.
|
||||
let ord = *call_ordinal;
|
||||
*call_ordinal += 1;
|
||||
let call_idx = push_node(g, StmtKind::Call, ast, lang, code, enclosing_func, ord);
|
||||
let call_idx = push_node(
|
||||
g,
|
||||
StmtKind::Call,
|
||||
ast,
|
||||
lang,
|
||||
code,
|
||||
enclosing_func,
|
||||
ord,
|
||||
analysis_rules,
|
||||
);
|
||||
connect_all(g, preds, call_idx, EdgeKind::Seq);
|
||||
let ret = push_node(g, StmtKind::Return, ast, lang, code, enclosing_func, 0);
|
||||
let ret = push_node(
|
||||
g,
|
||||
StmtKind::Return,
|
||||
ast,
|
||||
lang,
|
||||
code,
|
||||
enclosing_func,
|
||||
0,
|
||||
analysis_rules,
|
||||
);
|
||||
connect_all(g, &[call_idx], ret, EdgeKind::Seq);
|
||||
Vec::new()
|
||||
} else {
|
||||
let ret = push_node(g, StmtKind::Return, ast, lang, code, enclosing_func, 0);
|
||||
let ret = push_node(
|
||||
g,
|
||||
StmtKind::Return,
|
||||
ast,
|
||||
lang,
|
||||
code,
|
||||
enclosing_func,
|
||||
0,
|
||||
analysis_rules,
|
||||
);
|
||||
connect_all(g, preds, ret, EdgeKind::Seq);
|
||||
Vec::new() // terminates this path
|
||||
}
|
||||
}
|
||||
Kind::Break => {
|
||||
let brk = push_node(g, StmtKind::Break, ast, lang, code, enclosing_func, 0);
|
||||
let brk = push_node(
|
||||
g,
|
||||
StmtKind::Break,
|
||||
ast,
|
||||
lang,
|
||||
code,
|
||||
enclosing_func,
|
||||
0,
|
||||
analysis_rules,
|
||||
);
|
||||
connect_all(g, preds, brk, EdgeKind::Seq);
|
||||
break_targets.push(brk);
|
||||
Vec::new()
|
||||
}
|
||||
Kind::Continue => {
|
||||
let cont = push_node(g, StmtKind::Continue, ast, lang, code, enclosing_func, 0);
|
||||
let cont = push_node(
|
||||
g,
|
||||
StmtKind::Continue,
|
||||
ast,
|
||||
lang,
|
||||
code,
|
||||
enclosing_func,
|
||||
0,
|
||||
analysis_rules,
|
||||
);
|
||||
connect_all(g, preds, cont, EdgeKind::Seq);
|
||||
continue_targets.push(cont);
|
||||
Vec::new()
|
||||
}
|
||||
|
||||
|
|
@ -774,6 +936,7 @@ fn build_sub<'a>(
|
|||
// Track the last frontier before a function emptied it — used to
|
||||
// keep subsequent functions reachable.
|
||||
let mut last_live_frontier = preds.to_vec();
|
||||
let mut prev_was_preproc = false;
|
||||
for child in ast.children(&mut cursor) {
|
||||
let child_is_fn = lookup(lang, child.kind()) == Kind::Function;
|
||||
|
||||
|
|
@ -782,7 +945,13 @@ fn build_sub<'a>(
|
|||
// file-level predecessors. Without this, a preceding function
|
||||
// that ends with `return` (frontier = []) would leave subsequent
|
||||
// functions disconnected from the graph.
|
||||
let child_preds = if child_is_fn && frontier.is_empty() {
|
||||
//
|
||||
// Similarly, when a preprocessor block (`#ifdef ... #endif`)
|
||||
// contains an `if/else` whose else branch is on the other side
|
||||
// of the `#endif`, tree-sitter parses a dangling else that
|
||||
// empties the frontier. The code after the preproc block should
|
||||
// remain reachable.
|
||||
let child_preds = if frontier.is_empty() && (child_is_fn || prev_was_preproc) {
|
||||
last_live_frontier.clone()
|
||||
} else {
|
||||
frontier.clone()
|
||||
|
|
@ -798,12 +967,17 @@ fn build_sub<'a>(
|
|||
file_path,
|
||||
enclosing_func,
|
||||
call_ordinal,
|
||||
analysis_rules,
|
||||
break_targets,
|
||||
continue_targets,
|
||||
);
|
||||
|
||||
let is_preproc = child.kind().starts_with("preproc_");
|
||||
if !child_exits.is_empty() {
|
||||
last_live_frontier = child_exits.clone();
|
||||
}
|
||||
frontier = child_exits;
|
||||
prev_was_preproc = is_preproc;
|
||||
}
|
||||
frontier
|
||||
}
|
||||
|
|
@ -822,7 +996,16 @@ fn build_sub<'a>(
|
|||
tmp.into_iter().next()
|
||||
})
|
||||
.unwrap_or_else(|| "<anon>".to_string());
|
||||
let entry_idx = push_node(g, StmtKind::Seq, ast, lang, code, Some(&fn_name), 0);
|
||||
let entry_idx = push_node(
|
||||
g,
|
||||
StmtKind::Seq,
|
||||
ast,
|
||||
lang,
|
||||
code,
|
||||
Some(&fn_name),
|
||||
0,
|
||||
analysis_rules,
|
||||
);
|
||||
connect_all(g, preds, entry_idx, EdgeKind::Seq);
|
||||
|
||||
// 1b) extract parameter names
|
||||
|
|
@ -830,8 +1013,13 @@ fn build_sub<'a>(
|
|||
let param_count = param_names.len();
|
||||
|
||||
// 2) build its body with a fresh call ordinal counter for this function scope
|
||||
// Snapshot the current node count so we can iterate only over nodes
|
||||
// created within this function (avoids O(N²) scan of the full graph).
|
||||
let fn_first_node: NodeIndex = NodeIndex::new(g.node_count());
|
||||
let body = ast.child_by_field_name("body").expect("fn w/o body");
|
||||
let mut fn_call_ordinal: u32 = 0;
|
||||
let mut fn_breaks = Vec::new();
|
||||
let mut fn_continues = Vec::new();
|
||||
let body_exits = build_sub(
|
||||
body,
|
||||
&[entry_idx],
|
||||
|
|
@ -842,6 +1030,9 @@ fn build_sub<'a>(
|
|||
file_path,
|
||||
Some(&fn_name),
|
||||
&mut fn_call_ordinal,
|
||||
analysis_rules,
|
||||
&mut fn_breaks,
|
||||
&mut fn_continues,
|
||||
);
|
||||
|
||||
// ───── 3) light-weight dataflow ──────────────────────────────────────
|
||||
|
|
@ -862,11 +1053,12 @@ fn build_sub<'a>(
|
|||
|
||||
let param_set: HashSet<&str> = param_names.iter().map(|s| s.as_str()).collect();
|
||||
|
||||
for idx in g.node_indices() {
|
||||
// Iterate only over nodes created within this function scope
|
||||
// (entry_idx .. current end) instead of the entire graph.
|
||||
let fn_node_range = entry_idx.index()..g.node_count();
|
||||
for raw in fn_node_range {
|
||||
let idx = NodeIndex::new(raw);
|
||||
let info = &g[idx];
|
||||
if info.span.0 < ast.start_byte() || info.span.1 > ast.end_byte() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// collect callee names
|
||||
if let Some(callee) = &info.callee
|
||||
|
|
@ -1010,11 +1202,12 @@ fn build_sub<'a>(
|
|||
// this edge, the synthetic exit node is unreachable whenever
|
||||
// the function body ends with a `return` statement, which
|
||||
// disconnects all subsequent functions at the module level.
|
||||
for idx in g.node_indices() {
|
||||
//
|
||||
// Only scan nodes created within this function scope.
|
||||
for raw in fn_first_node.index()..g.node_count() {
|
||||
let idx = NodeIndex::new(raw);
|
||||
let info = &g[idx];
|
||||
if info.kind == StmtKind::Return
|
||||
&& info.span.0 >= ast.start_byte()
|
||||
&& info.span.1 <= ast.end_byte()
|
||||
&& idx != exit_idx
|
||||
&& !g.contains_edge(idx, exit_idx)
|
||||
{
|
||||
|
|
@ -1068,6 +1261,9 @@ fn build_sub<'a>(
|
|||
file_path,
|
||||
enclosing_func,
|
||||
call_ordinal,
|
||||
analysis_rules,
|
||||
break_targets,
|
||||
continue_targets,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -1085,8 +1281,25 @@ fn build_sub<'a>(
|
|||
} else {
|
||||
0
|
||||
};
|
||||
let node = push_node(g, kind, ast, lang, code, enclosing_func, ord);
|
||||
let node = push_node(
|
||||
g,
|
||||
kind,
|
||||
ast,
|
||||
lang,
|
||||
code,
|
||||
enclosing_func,
|
||||
ord,
|
||||
analysis_rules,
|
||||
);
|
||||
connect_all(g, preds, node, EdgeKind::Seq);
|
||||
|
||||
// If the callee is a configured terminator, treat as a dead end
|
||||
if kind == StmtKind::Call
|
||||
&& let Some(callee) = &g[node].callee
|
||||
&& is_configured_terminator(callee, analysis_rules)
|
||||
{
|
||||
return Vec::new();
|
||||
}
|
||||
vec![node]
|
||||
}
|
||||
|
||||
|
|
@ -1095,8 +1308,24 @@ fn build_sub<'a>(
|
|||
Kind::CallFn | Kind::CallMethod | Kind::CallMacro => {
|
||||
let ord = *call_ordinal;
|
||||
*call_ordinal += 1;
|
||||
let n = push_node(g, StmtKind::Call, ast, lang, code, enclosing_func, ord);
|
||||
let n = push_node(
|
||||
g,
|
||||
StmtKind::Call,
|
||||
ast,
|
||||
lang,
|
||||
code,
|
||||
enclosing_func,
|
||||
ord,
|
||||
analysis_rules,
|
||||
);
|
||||
connect_all(g, preds, n, EdgeKind::Seq);
|
||||
|
||||
// If the callee is a configured terminator, treat as a dead end
|
||||
if let Some(callee) = &g[n].callee
|
||||
&& is_configured_terminator(callee, analysis_rules)
|
||||
{
|
||||
return Vec::new();
|
||||
}
|
||||
vec![n]
|
||||
}
|
||||
|
||||
|
|
@ -1115,7 +1344,16 @@ fn build_sub<'a>(
|
|||
} else {
|
||||
0
|
||||
};
|
||||
let n = push_node(g, kind, ast, lang, code, enclosing_func, ord);
|
||||
let n = push_node(
|
||||
g,
|
||||
kind,
|
||||
ast,
|
||||
lang,
|
||||
code,
|
||||
enclosing_func,
|
||||
ord,
|
||||
analysis_rules,
|
||||
);
|
||||
connect_all(g, preds, n, EdgeKind::Seq);
|
||||
vec![n]
|
||||
}
|
||||
|
|
@ -1127,7 +1365,16 @@ fn build_sub<'a>(
|
|||
// Every other node = simple sequential statement
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
_ => {
|
||||
let n = push_node(g, StmtKind::Seq, ast, lang, code, enclosing_func, 0);
|
||||
let n = push_node(
|
||||
g,
|
||||
StmtKind::Seq,
|
||||
ast,
|
||||
lang,
|
||||
code,
|
||||
enclosing_func,
|
||||
0,
|
||||
analysis_rules,
|
||||
);
|
||||
connect_all(g, preds, n, EdgeKind::Seq);
|
||||
vec![n]
|
||||
}
|
||||
|
|
@ -1150,6 +1397,7 @@ pub(crate) fn build_cfg<'a>(
|
|||
code: &'a [u8],
|
||||
lang: &str,
|
||||
file_path: &str,
|
||||
analysis_rules: Option<&LangAnalysisRules>,
|
||||
) -> (Cfg, NodeIndex, FuncSummaries) {
|
||||
debug!(target: "cfg", "Building CFG for {:?}", tree.root_node());
|
||||
|
||||
|
|
@ -1178,6 +1426,8 @@ pub(crate) fn build_cfg<'a>(
|
|||
|
||||
// Build the body below the synthetic ENTRY.
|
||||
let mut top_ordinal: u32 = 0;
|
||||
let mut top_breaks = Vec::new();
|
||||
let mut top_continues = Vec::new();
|
||||
let exits = build_sub(
|
||||
tree.root_node(),
|
||||
&[entry],
|
||||
|
|
@ -1188,6 +1438,9 @@ pub(crate) fn build_cfg<'a>(
|
|||
file_path,
|
||||
None,
|
||||
&mut top_ordinal,
|
||||
analysis_rules,
|
||||
&mut top_breaks,
|
||||
&mut top_continues,
|
||||
);
|
||||
debug!(target: "cfg", "exits: {:?}", exits);
|
||||
// Wire every real exit to our synthetic EXIT node.
|
||||
|
|
|
|||
|
|
@ -2,15 +2,75 @@ use super::dominators::{self, dominates};
|
|||
use super::rules;
|
||||
use super::{AnalysisContext, CfgAnalysis, CfgFinding, Confidence, is_entry_point_func};
|
||||
use crate::cfg::StmtKind;
|
||||
use crate::labels::{Cap, DataLabel};
|
||||
use crate::labels::{Cap, DataLabel, RuntimeLabelRule};
|
||||
use crate::patterns::Severity;
|
||||
use petgraph::graph::NodeIndex;
|
||||
|
||||
pub struct UnguardedSink;
|
||||
|
||||
/// Check whether **all** arguments to the sink are constants (no taint-capable
|
||||
/// variable flows). Extends the inline callee-part check by tracing one hop
|
||||
/// through the CFG: if a used variable is defined by a node that itself has
|
||||
/// empty `uses` and no Source label, the definition is treated as a constant
|
||||
/// binding (e.g. `let cmd = "git"; Command::new(cmd)`).
|
||||
fn is_all_args_constant(ctx: &AnalysisContext, sink: NodeIndex) -> bool {
|
||||
let sink_info = &ctx.cfg[sink];
|
||||
let callee_desc = sink_info.callee.as_deref().unwrap_or("");
|
||||
let callee_parts: Vec<&str> = callee_desc.split(['.', ':']).collect();
|
||||
let sink_func = sink_info.enclosing_func.as_deref();
|
||||
|
||||
sink_info.uses.iter().all(|u| {
|
||||
// Part of the callee name itself → constant
|
||||
if callee_parts.contains(&u.as_str()) {
|
||||
return true;
|
||||
}
|
||||
// One-hop trace: find the defining node in the same function
|
||||
for idx in ctx.cfg.node_indices() {
|
||||
let info = &ctx.cfg[idx];
|
||||
if info.enclosing_func.as_deref() != sink_func {
|
||||
continue;
|
||||
}
|
||||
if info.defines.as_deref() == Some(u.as_str()) {
|
||||
// If the defining node has no uses (pure constant) and is not
|
||||
// a Source, the variable is constant.
|
||||
if info.uses.is_empty() && !matches!(info.label, Some(DataLabel::Source(_))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
})
|
||||
}
|
||||
|
||||
/// Check if a callee matches any of the runtime label rules that are sanitizers.
|
||||
fn match_config_sanitizer(callee: &str, extra: &[RuntimeLabelRule]) -> Option<Cap> {
|
||||
let callee_lower = callee.to_ascii_lowercase();
|
||||
for rule in extra {
|
||||
let cap = match rule.label {
|
||||
DataLabel::Sanitizer(c) => c,
|
||||
_ => continue,
|
||||
};
|
||||
for m in &rule.matchers {
|
||||
let ml = m.to_ascii_lowercase();
|
||||
if ml.ends_with('_') {
|
||||
if callee_lower.starts_with(&ml) {
|
||||
return Some(cap);
|
||||
}
|
||||
} else if callee_lower.ends_with(&ml) {
|
||||
return Some(cap);
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Find all nodes in the CFG that are calls to guard functions.
|
||||
fn find_guard_nodes(ctx: &AnalysisContext) -> Vec<(NodeIndex, Cap)> {
|
||||
let guard_rules = rules::guard_rules(ctx.lang);
|
||||
let config_rules = ctx
|
||||
.analysis_rules
|
||||
.map(|r| r.extra_labels.as_slice())
|
||||
.unwrap_or(&[]);
|
||||
let mut result = Vec::new();
|
||||
|
||||
for idx in ctx.cfg.node_indices() {
|
||||
|
|
@ -19,6 +79,13 @@ fn find_guard_nodes(ctx: &AnalysisContext) -> Vec<(NodeIndex, Cap)> {
|
|||
continue;
|
||||
}
|
||||
if let Some(callee) = &info.callee {
|
||||
// Check config sanitizer rules first
|
||||
if let Some(cap) = match_config_sanitizer(callee, config_rules) {
|
||||
result.push((idx, cap));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Then check built-in guard rules
|
||||
let callee_lower = callee.to_ascii_lowercase();
|
||||
for rule in guard_rules {
|
||||
let matched = rule.matchers.iter().any(|m| {
|
||||
|
|
@ -174,6 +241,13 @@ impl CfgAnalysis for UnguardedSink {
|
|||
|
||||
let has_taint = taint_confirms_sink(ctx, *sink);
|
||||
let source_derived = sink_arg_is_source_derived(ctx, *sink);
|
||||
|
||||
// If sink args are all constants (including one-hop constant bindings)
|
||||
// and taint didn't confirm, this is a false positive — skip it.
|
||||
if is_all_args_constant(ctx, *sink) && !has_taint && !source_derived {
|
||||
continue;
|
||||
}
|
||||
|
||||
let param_only = sink_arg_is_parameter_only(ctx, *sink);
|
||||
let in_entrypoint = sink_in_entrypoint(ctx, *sink);
|
||||
|
||||
|
|
@ -183,6 +257,9 @@ impl CfgAnalysis for UnguardedSink {
|
|||
} else if param_only && !in_entrypoint {
|
||||
// Wrapper function consuming only parameters → LOW
|
||||
(Severity::Low, Confidence::Low)
|
||||
} else if !ctx.taint_active && !source_derived {
|
||||
// CFG-only mode without taint confirmation → LOW
|
||||
(Severity::Low, Confidence::Low)
|
||||
} else if in_entrypoint && !param_only {
|
||||
// Entrypoint with non-parameter args but no taint confirmation → MEDIUM
|
||||
(Severity::Medium, Confidence::Medium)
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ mod tests;
|
|||
pub mod unreachable;
|
||||
|
||||
use crate::cfg::{FuncSummaries, NodeInfo, StmtKind};
|
||||
use crate::labels::DataLabel;
|
||||
use crate::labels::{DataLabel, LangAnalysisRules};
|
||||
use crate::patterns::Severity;
|
||||
use crate::summary::GlobalSummaries;
|
||||
use crate::symbol::Lang;
|
||||
|
|
@ -51,6 +51,11 @@ pub struct AnalysisContext<'a> {
|
|||
#[allow(dead_code)]
|
||||
pub global_summaries: Option<&'a GlobalSummaries>,
|
||||
pub taint_findings: &'a [taint::Finding],
|
||||
pub analysis_rules: Option<&'a LangAnalysisRules>,
|
||||
/// Whether full taint analysis was active for this file (global summaries
|
||||
/// existed and taint engine ran). When false, structural findings without
|
||||
/// taint confirmation should be treated with lower confidence.
|
||||
pub taint_active: bool,
|
||||
}
|
||||
|
||||
pub trait CfgAnalysis {
|
||||
|
|
@ -87,6 +92,20 @@ pub fn run_all(ctx: &AnalysisContext) -> Vec<CfgFinding> {
|
|||
true
|
||||
});
|
||||
|
||||
// ── Dedup: suppress cfg-unguarded-sink when cfg-unreachable-sink covers the span ──
|
||||
let unreachable_spans: HashSet<(usize, usize)> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-unreachable-sink")
|
||||
.map(|f| f.span)
|
||||
.collect();
|
||||
|
||||
findings.retain(|f| {
|
||||
if f.rule_id == "cfg-unguarded-sink" && unreachable_spans.contains(&f.span) {
|
||||
return false;
|
||||
}
|
||||
true
|
||||
});
|
||||
|
||||
scoring::score_findings(&mut findings, ctx);
|
||||
findings.sort_by(|a, b| {
|
||||
b.score
|
||||
|
|
@ -97,11 +116,36 @@ pub fn run_all(ctx: &AnalysisContext) -> Vec<CfgFinding> {
|
|||
}
|
||||
|
||||
/// Helper: check whether a node is a guard call (validate, sanitize, check, etc.).
|
||||
pub(crate) fn is_guard_call(info: &NodeInfo, lang: Lang) -> bool {
|
||||
pub(crate) fn is_guard_call(
|
||||
info: &NodeInfo,
|
||||
lang: Lang,
|
||||
analysis_rules: Option<&LangAnalysisRules>,
|
||||
) -> bool {
|
||||
if info.kind != StmtKind::Call {
|
||||
return false;
|
||||
}
|
||||
if let Some(callee) = &info.callee {
|
||||
// Check config sanitizer rules
|
||||
if let Some(extras) = analysis_rules {
|
||||
let callee_lower = callee.to_ascii_lowercase();
|
||||
for rule in &extras.extra_labels {
|
||||
if !matches!(rule.label, DataLabel::Sanitizer(_)) {
|
||||
continue;
|
||||
}
|
||||
for m in &rule.matchers {
|
||||
let ml = m.to_ascii_lowercase();
|
||||
if ml.ends_with('_') {
|
||||
if callee_lower.starts_with(&ml) {
|
||||
return true;
|
||||
}
|
||||
} else if callee_lower.ends_with(&ml) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check built-in guard rules
|
||||
let guard_rules = rules::guard_rules(lang);
|
||||
let callee_lower = callee.to_ascii_lowercase();
|
||||
for rule in guard_rules {
|
||||
|
|
|
|||
|
|
@ -8,8 +8,13 @@ use std::collections::HashSet;
|
|||
|
||||
pub struct ResourceMisuse;
|
||||
|
||||
/// Find nodes matching acquire patterns for a given resource pair.
|
||||
fn find_acquire_nodes(ctx: &AnalysisContext, acquire_patterns: &[&str]) -> Vec<NodeIndex> {
|
||||
/// Find nodes matching acquire patterns for a given resource pair,
|
||||
/// excluding any that match `exclude_patterns`.
|
||||
fn find_acquire_nodes(
|
||||
ctx: &AnalysisContext,
|
||||
acquire_patterns: &[&str],
|
||||
exclude_patterns: &[&str],
|
||||
) -> Vec<NodeIndex> {
|
||||
ctx.cfg
|
||||
.node_indices()
|
||||
.filter(|&idx| {
|
||||
|
|
@ -19,6 +24,16 @@ fn find_acquire_nodes(ctx: &AnalysisContext, acquire_patterns: &[&str]) -> Vec<N
|
|||
}
|
||||
if let Some(callee) = &info.callee {
|
||||
let callee_lower = callee.to_ascii_lowercase();
|
||||
// Check exclusions first — if the callee matches an exclude
|
||||
// pattern, it is NOT an acquire even if it also matches an
|
||||
// acquire pattern (e.g. `freopen` ends with `fopen`).
|
||||
let excluded = exclude_patterns.iter().any(|p| {
|
||||
let pl = p.to_ascii_lowercase();
|
||||
callee_lower.ends_with(&pl) || callee_lower == pl
|
||||
});
|
||||
if excluded {
|
||||
return false;
|
||||
}
|
||||
acquire_patterns.iter().any(|p| {
|
||||
let pl = p.to_ascii_lowercase();
|
||||
callee_lower.ends_with(&pl) || callee_lower == pl
|
||||
|
|
@ -113,6 +128,204 @@ fn all_paths_pass_through(
|
|||
true
|
||||
}
|
||||
|
||||
/// Check whether the acquired variable is stored into a struct field (ownership
|
||||
/// transfer) downstream of the acquire node. Patterns recognised:
|
||||
/// - `ptr->field = var` (C arrow operator)
|
||||
/// - `obj.field = var` (C dot / generic field store)
|
||||
/// - `list->next = ...` (linked-list insertion)
|
||||
///
|
||||
/// If the variable is transferred, there is no leak — the receiving struct is
|
||||
/// responsible for the lifetime.
|
||||
fn is_ownership_transferred(ctx: &AnalysisContext, acquire: NodeIndex) -> bool {
|
||||
let acquired_var = match &ctx.cfg[acquire].defines {
|
||||
Some(v) => v.clone(),
|
||||
None => return false,
|
||||
};
|
||||
|
||||
// BFS through CFG successors looking for a node whose span text
|
||||
// mentions the acquired variable in a struct-field store context.
|
||||
use std::collections::VecDeque;
|
||||
let mut visited = HashSet::new();
|
||||
let mut queue = VecDeque::new();
|
||||
for succ in ctx.cfg.neighbors(acquire) {
|
||||
if visited.insert(succ) {
|
||||
queue.push_back(succ);
|
||||
}
|
||||
}
|
||||
|
||||
while let Some(node) = queue.pop_front() {
|
||||
let info = &ctx.cfg[node];
|
||||
let (start, end) = info.span;
|
||||
|
||||
// Check the source text at this node's span for the acquired variable
|
||||
// appearing in a struct-field store context.
|
||||
let references_var = info.uses.iter().any(|u| u == &acquired_var)
|
||||
|| info.defines.as_ref().is_some_and(|d| d == &acquired_var);
|
||||
|
||||
if references_var && start < end && end <= ctx.source_bytes.len() {
|
||||
let span_text = &ctx.source_bytes[start..end];
|
||||
// `->` anywhere in span means pointer-to-member store
|
||||
if span_text.windows(2).any(|w| w == b"->") {
|
||||
return true;
|
||||
}
|
||||
// `.field = var` pattern (but not `==`)
|
||||
if has_dot_field_assignment(span_text) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// If the variable is truly redefined (not a field write), stop
|
||||
// following this path. A true redefinition is when `defines` matches
|
||||
// but the span doesn't contain `->` or `.field =` patterns.
|
||||
if info.defines.as_ref().is_some_and(|d| d == &acquired_var) {
|
||||
let is_field_write = if start < end && end <= ctx.source_bytes.len() {
|
||||
let span_text = &ctx.source_bytes[start..end];
|
||||
span_text.windows(2).any(|w| w == b"->") || has_dot_field_assignment(span_text)
|
||||
} else {
|
||||
false
|
||||
};
|
||||
if !is_field_write {
|
||||
continue; // genuine redefinition — stop this path
|
||||
}
|
||||
}
|
||||
|
||||
for succ in ctx.cfg.neighbors(node) {
|
||||
if visited.insert(succ) {
|
||||
queue.push_back(succ);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Check if `span_text` contains a dot-field assignment pattern like
|
||||
/// `obj.field = var` (but not `obj.method(...)` or `a == b`).
|
||||
fn has_dot_field_assignment(span_text: &[u8]) -> bool {
|
||||
// Look for `.` followed (possibly with ident chars) by `=` but not `==`
|
||||
let mut i = 0;
|
||||
while i < span_text.len() {
|
||||
if span_text[i] == b'.' {
|
||||
// Scan forward past identifier chars to find `=`
|
||||
let mut j = i + 1;
|
||||
while j < span_text.len()
|
||||
&& (span_text[j].is_ascii_alphanumeric() || span_text[j] == b'_')
|
||||
{
|
||||
j += 1;
|
||||
}
|
||||
// Skip whitespace
|
||||
while j < span_text.len() && span_text[j].is_ascii_whitespace() {
|
||||
j += 1;
|
||||
}
|
||||
// Check for `=` but not `==`
|
||||
if j < span_text.len()
|
||||
&& span_text[j] == b'='
|
||||
&& (j + 1 >= span_text.len() || span_text[j + 1] != b'=')
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Check whether the acquired variable is consumed by an ownership-taking
|
||||
/// function (e.g. `FileResponse(f)`, `send_file(f)`) downstream of the
|
||||
/// acquire node. These functions take ownership of the file handle so there
|
||||
/// is no leak.
|
||||
fn is_consumed_by_owner(ctx: &AnalysisContext, acquire: NodeIndex) -> bool {
|
||||
static CONSUMING_SINKS: &[&str] = &[
|
||||
"fileresponse",
|
||||
"streaminghttpresponse",
|
||||
"send_file",
|
||||
"make_response",
|
||||
];
|
||||
|
||||
let acquired_var = match &ctx.cfg[acquire].defines {
|
||||
Some(v) => v.clone(),
|
||||
None => return false,
|
||||
};
|
||||
|
||||
use std::collections::VecDeque;
|
||||
let mut visited = HashSet::new();
|
||||
let mut queue = VecDeque::new();
|
||||
for succ in ctx.cfg.neighbors(acquire) {
|
||||
if visited.insert(succ) {
|
||||
queue.push_back(succ);
|
||||
}
|
||||
}
|
||||
|
||||
while let Some(node) = queue.pop_front() {
|
||||
let info = &ctx.cfg[node];
|
||||
|
||||
// Check Call nodes with callee that matches a consuming sink
|
||||
if info.kind == StmtKind::Call
|
||||
&& let Some(callee) = &info.callee
|
||||
{
|
||||
let callee_lower = callee.to_ascii_lowercase();
|
||||
let is_consuming = CONSUMING_SINKS.iter().any(|s| callee_lower.ends_with(s));
|
||||
if is_consuming && info.uses.iter().any(|u| u == &acquired_var) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Also check the span text for consuming calls — handles cases where
|
||||
// the call is embedded in a return statement (e.g. `return FileResponse(f)`)
|
||||
if info.uses.iter().any(|u| u == &acquired_var) {
|
||||
let (start, end) = info.span;
|
||||
if start < end && end <= ctx.source_bytes.len() {
|
||||
let span_lower: Vec<u8> = ctx.source_bytes[start..end]
|
||||
.iter()
|
||||
.map(|b| b.to_ascii_lowercase())
|
||||
.collect();
|
||||
if CONSUMING_SINKS
|
||||
.iter()
|
||||
.any(|s| span_lower.windows(s.len()).any(|w| w == s.as_bytes()))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for succ in ctx.cfg.neighbors(node) {
|
||||
if visited.insert(succ) {
|
||||
queue.push_back(succ);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// For mutex pairs, check that an explicit `.acquire()` or `.lock()` call
|
||||
/// exists on the acquired variable in the CFG. If only the constructor
|
||||
/// (e.g. `threading.Lock()`) is observed without acquire, skip the finding.
|
||||
fn has_explicit_lock_acquire(ctx: &AnalysisContext, acquire: NodeIndex) -> bool {
|
||||
let acquired_var = match &ctx.cfg[acquire].defines {
|
||||
Some(v) => v.clone(),
|
||||
None => return false,
|
||||
};
|
||||
|
||||
for idx in ctx.cfg.node_indices() {
|
||||
let info = &ctx.cfg[idx];
|
||||
if info.kind != StmtKind::Call {
|
||||
continue;
|
||||
}
|
||||
if let Some(callee) = &info.callee {
|
||||
let callee_lower = callee.to_ascii_lowercase();
|
||||
let is_lock_call = callee_lower.ends_with(".acquire")
|
||||
|| callee_lower.ends_with(".lock")
|
||||
|| callee_lower == "pthread_mutex_lock";
|
||||
if is_lock_call && info.uses.iter().any(|u| u == &acquired_var) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
impl CfgAnalysis for ResourceMisuse {
|
||||
fn name(&self) -> &'static str {
|
||||
"resource-misuse"
|
||||
|
|
@ -128,11 +341,18 @@ impl CfgAnalysis for ResourceMisuse {
|
|||
let mut findings = Vec::new();
|
||||
|
||||
for pair in pairs {
|
||||
let acquire_nodes = find_acquire_nodes(ctx, pair.acquire);
|
||||
let acquire_nodes = find_acquire_nodes(ctx, pair.acquire, pair.exclude_acquire);
|
||||
let release_nodes = find_release_nodes(ctx, pair.release);
|
||||
|
||||
for &acquire in &acquire_nodes {
|
||||
if !release_on_all_exit_paths(ctx, acquire, &release_nodes, exit) {
|
||||
if !release_on_all_exit_paths(ctx, acquire, &release_nodes, exit)
|
||||
&& !is_ownership_transferred(ctx, acquire)
|
||||
&& !is_consumed_by_owner(ctx, acquire)
|
||||
{
|
||||
// For mutex pairs, require an explicit .acquire()/.lock() call
|
||||
if pair.resource_name == "mutex" && !has_explicit_lock_acquire(ctx, acquire) {
|
||||
continue;
|
||||
}
|
||||
let info = &ctx.cfg[acquire];
|
||||
let callee_desc = info.callee.as_deref().unwrap_or("(acquire)");
|
||||
|
||||
|
|
|
|||
|
|
@ -21,6 +21,9 @@ pub struct EntryPointRule {
|
|||
pub struct ResourcePair {
|
||||
pub acquire: &'static [&'static str],
|
||||
pub release: &'static [&'static str],
|
||||
/// Patterns that look like acquire calls (e.g. `freopen` ends with `fopen`)
|
||||
/// but should NOT be treated as acquisitions.
|
||||
pub exclude_acquire: &'static [&'static str],
|
||||
pub resource_name: &'static str,
|
||||
}
|
||||
|
||||
|
|
@ -47,6 +50,16 @@ static COMMON_GUARDS: &[GuardRule] = &[
|
|||
matchers: &["url_encode", "encode_uri", "urlencode"],
|
||||
applies_to_sink_caps: Cap::URL_ENCODE,
|
||||
},
|
||||
GuardRule {
|
||||
matchers: &[
|
||||
"which",
|
||||
"resolve_binary",
|
||||
"find_program",
|
||||
"lookup_path",
|
||||
"shutil.which",
|
||||
],
|
||||
applies_to_sink_caps: Cap::SHELL_ESCAPE,
|
||||
},
|
||||
];
|
||||
|
||||
pub fn guard_rules(_lang: Lang) -> &'static [GuardRule] {
|
||||
|
|
@ -168,21 +181,25 @@ static C_RESOURCES: &[ResourcePair] = &[
|
|||
ResourcePair {
|
||||
acquire: &["malloc", "calloc", "realloc"],
|
||||
release: &["free"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "memory",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &["fopen"],
|
||||
release: &["fclose"],
|
||||
acquire: &["fopen", "fdopen", "curlx_fopen", "curlx_fdopen"],
|
||||
release: &["fclose", "curlx_fclose"],
|
||||
exclude_acquire: &["freopen", "curlx_freopen"],
|
||||
resource_name: "file handle",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &["open"],
|
||||
release: &["close"],
|
||||
exclude_acquire: &["freopen", "curlx_freopen"],
|
||||
resource_name: "file descriptor",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &["pthread_mutex_lock"],
|
||||
release: &["pthread_mutex_unlock"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "mutex",
|
||||
},
|
||||
];
|
||||
|
|
@ -191,11 +208,13 @@ static GO_RESOURCES: &[ResourcePair] = &[
|
|||
ResourcePair {
|
||||
acquire: &["os.Open", "os.Create", "os.OpenFile"],
|
||||
release: &[".Close"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "file handle",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &[".Lock"],
|
||||
release: &[".Unlock"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "mutex",
|
||||
},
|
||||
];
|
||||
|
|
@ -205,6 +224,7 @@ static RUST_RESOURCES: &[ResourcePair] = &[
|
|||
ResourcePair {
|
||||
acquire: &["alloc"],
|
||||
release: &["dealloc"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "raw memory",
|
||||
},
|
||||
];
|
||||
|
|
@ -217,10 +237,93 @@ static JAVA_RESOURCES: &[ResourcePair] = &[ResourcePair {
|
|||
"openConnection",
|
||||
],
|
||||
release: &[".close"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "stream/connection",
|
||||
}];
|
||||
|
||||
static EMPTY_RESOURCES: &[ResourcePair] = &[];
|
||||
static PYTHON_RESOURCES: &[ResourcePair] = &[
|
||||
ResourcePair {
|
||||
acquire: &["open"],
|
||||
release: &[".close"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "file handle",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &["socket.socket", "socket"],
|
||||
release: &[".close"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "socket",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &["connect", "cursor"],
|
||||
release: &[".close"],
|
||||
exclude_acquire: &["signal.connect", "event.connect", ".register"],
|
||||
resource_name: "db connection",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &["threading.Lock", "threading.RLock"],
|
||||
release: &[".release"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "mutex",
|
||||
},
|
||||
];
|
||||
|
||||
static RUBY_RESOURCES: &[ResourcePair] = &[
|
||||
ResourcePair {
|
||||
acquire: &["File.open", "open"],
|
||||
release: &[".close"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "file handle",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &["TCPSocket.new", "UDPSocket.new"],
|
||||
release: &[".close"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "socket",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &[".lock"],
|
||||
release: &[".unlock"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "mutex",
|
||||
},
|
||||
];
|
||||
|
||||
static PHP_RESOURCES: &[ResourcePair] = &[
|
||||
ResourcePair {
|
||||
acquire: &["fopen"],
|
||||
release: &["fclose"],
|
||||
exclude_acquire: &["freopen"],
|
||||
resource_name: "file handle",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &["mysqli_connect"],
|
||||
release: &["mysqli_close"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "db connection",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &["curl_init"],
|
||||
release: &["curl_close"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "curl handle",
|
||||
},
|
||||
];
|
||||
|
||||
static JS_RESOURCES: &[ResourcePair] = &[
|
||||
ResourcePair {
|
||||
acquire: &["fs.open", "fs.openSync"],
|
||||
release: &["fs.close", "fs.closeSync"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "file descriptor",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &["createReadStream", "createWriteStream"],
|
||||
release: &[".close", ".destroy"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "stream",
|
||||
},
|
||||
];
|
||||
|
||||
pub fn resource_pairs(lang: Lang) -> &'static [ResourcePair] {
|
||||
match lang {
|
||||
|
|
@ -229,6 +332,9 @@ pub fn resource_pairs(lang: Lang) -> &'static [ResourcePair] {
|
|||
Lang::Go => GO_RESOURCES,
|
||||
Lang::Rust => RUST_RESOURCES,
|
||||
Lang::Java => JAVA_RESOURCES,
|
||||
_ => EMPTY_RESOURCES,
|
||||
Lang::Python => PYTHON_RESOURCES,
|
||||
Lang::Ruby => RUBY_RESOURCES,
|
||||
Lang::Php => PHP_RESOURCES,
|
||||
Lang::JavaScript | Lang::TypeScript => JS_RESOURCES,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ fn parse_and_analyse<A: CfgAnalysis>(
|
|||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(src, None).unwrap();
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs", None);
|
||||
let lang = Lang::from_slug(lang_str).unwrap();
|
||||
let ctx = AnalysisContext {
|
||||
cfg: &cfg,
|
||||
|
|
@ -25,6 +25,8 @@ fn parse_and_analyse<A: CfgAnalysis>(
|
|||
func_summaries: &summaries,
|
||||
global_summaries: None,
|
||||
taint_findings: &[],
|
||||
analysis_rules: None,
|
||||
taint_active: true,
|
||||
};
|
||||
analysis.run(&ctx)
|
||||
}
|
||||
|
|
@ -34,7 +36,7 @@ fn parse_and_run_all(src: &[u8], lang_str: &str, ts_lang: Language) -> Vec<CfgFi
|
|||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(src, None).unwrap();
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs", None);
|
||||
let lang = Lang::from_slug(lang_str).unwrap();
|
||||
let ctx = AnalysisContext {
|
||||
cfg: &cfg,
|
||||
|
|
@ -45,6 +47,8 @@ fn parse_and_run_all(src: &[u8], lang_str: &str, ts_lang: Language) -> Vec<CfgFi
|
|||
func_summaries: &summaries,
|
||||
global_summaries: None,
|
||||
taint_findings: &[],
|
||||
analysis_rules: None,
|
||||
taint_active: true,
|
||||
};
|
||||
run_all(&ctx)
|
||||
}
|
||||
|
|
@ -59,7 +63,7 @@ fn parse_and_run_all_with_taint(
|
|||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(src, None).unwrap();
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs", None);
|
||||
let lang = Lang::from_slug(lang_str).unwrap();
|
||||
let ctx = AnalysisContext {
|
||||
cfg: &cfg,
|
||||
|
|
@ -70,6 +74,8 @@ fn parse_and_run_all_with_taint(
|
|||
func_summaries: &summaries,
|
||||
global_summaries: None,
|
||||
taint_findings,
|
||||
analysis_rules: None,
|
||||
taint_active: true,
|
||||
};
|
||||
run_all(&ctx)
|
||||
}
|
||||
|
|
@ -144,7 +150,7 @@ fn unreachable_detects_orphaned_nodes() {
|
|||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
|
||||
// All nodes in linear code should be reachable
|
||||
let reachable = dominators::reachable_set(&cfg, entry);
|
||||
|
|
@ -469,7 +475,7 @@ fn reachable_set_contains_all_connected_nodes() {
|
|||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
|
||||
let reachable = dominators::reachable_set(&cfg, entry);
|
||||
|
||||
|
|
@ -493,7 +499,7 @@ fn find_exit_node_exists() {
|
|||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, _, _) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, _, _) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
|
||||
let exit = dominators::find_exit_node(&cfg);
|
||||
assert!(exit.is_some(), "Should find an exit node");
|
||||
|
|
@ -512,7 +518,7 @@ fn shortest_distance_basic() {
|
|||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
|
||||
let exit = dominators::find_exit_node(&cfg).unwrap();
|
||||
let dist = dominators::shortest_distance(&cfg, entry, exit);
|
||||
|
|
@ -656,7 +662,7 @@ fn taint_and_unguarded_sink_deduped() {
|
|||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg_graph, entry, _summaries) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg_graph, entry, _summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let _lang = Lang::from_slug("rust").unwrap();
|
||||
|
||||
// Find a sink node to create a synthetic taint finding
|
||||
|
|
@ -674,6 +680,7 @@ fn taint_and_unguarded_sink_deduped() {
|
|||
sink: sink_node,
|
||||
source: entry,
|
||||
path: vec![entry, sink_node],
|
||||
source_kind: crate::labels::SourceKind::UserInput,
|
||||
}];
|
||||
|
||||
let findings = parse_and_run_all_with_taint(
|
||||
|
|
@ -719,3 +726,831 @@ fn process_star_without_web_params_no_auth_gap() {
|
|||
auth_findings
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Resource leak tests (additional languages) ────────────────────
|
||||
|
||||
#[test]
|
||||
fn resource_leak_python_open_without_close() {
|
||||
let src = br#"
|
||||
def process():
|
||||
f = open("data.txt")
|
||||
data = f.read()
|
||||
"#;
|
||||
|
||||
let findings = parse_and_analyse(
|
||||
&resources::ResourceMisuse,
|
||||
src,
|
||||
"python",
|
||||
Language::from(tree_sitter_python::LANGUAGE),
|
||||
);
|
||||
|
||||
let leak_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-resource-leak")
|
||||
.collect();
|
||||
assert!(
|
||||
!leak_findings.is_empty(),
|
||||
"Should detect open() without close() in Python"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resource_leak_php_fopen_without_fclose() {
|
||||
let src = br#"<?php
|
||||
function read_file() {
|
||||
$fp = fopen("data.txt", "r");
|
||||
$data = fread($fp, 1024);
|
||||
}
|
||||
"#;
|
||||
|
||||
let findings = parse_and_analyse(
|
||||
&resources::ResourceMisuse,
|
||||
src,
|
||||
"php",
|
||||
Language::from(tree_sitter_php::LANGUAGE_PHP),
|
||||
);
|
||||
|
||||
let leak_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-resource-leak")
|
||||
.collect();
|
||||
assert!(
|
||||
!leak_findings.is_empty(),
|
||||
"Should detect fopen() without fclose() in PHP"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resource_leak_js_open_without_close() {
|
||||
let src = br#"
|
||||
function readFile() {
|
||||
var fd = fs.openSync("data.txt", "r");
|
||||
var data = fs.readSync(fd, buf, 0, 100, 0);
|
||||
}
|
||||
"#;
|
||||
|
||||
let findings = parse_and_analyse(
|
||||
&resources::ResourceMisuse,
|
||||
src,
|
||||
"javascript",
|
||||
Language::from(tree_sitter_javascript::LANGUAGE),
|
||||
);
|
||||
|
||||
let leak_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-resource-leak")
|
||||
.collect();
|
||||
assert!(
|
||||
!leak_findings.is_empty(),
|
||||
"Should detect fs.openSync() without fs.closeSync() in JS"
|
||||
);
|
||||
}
|
||||
|
||||
// ─── JS CFG precision tests ────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn js_throw_terminates_block() {
|
||||
// throw should act as a terminator — code directly after throw in the same
|
||||
// block should be unreachable.
|
||||
let src = br#"
|
||||
function fail() {
|
||||
throw new Error("fatal");
|
||||
eval("dead code");
|
||||
}
|
||||
"#;
|
||||
|
||||
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, entry, _) = build_cfg(&tree, src, "javascript", "test.js", None);
|
||||
|
||||
// Verify throw creates a Return-kind node
|
||||
let throw_nodes: Vec<_> = cfg
|
||||
.node_indices()
|
||||
.filter(|&idx| {
|
||||
cfg[idx].kind == crate::cfg::StmtKind::Return
|
||||
&& cfg[idx].span.0 > 0
|
||||
&& src[cfg[idx].span.0..].starts_with(b"throw")
|
||||
})
|
||||
.collect();
|
||||
|
||||
assert!(
|
||||
!throw_nodes.is_empty(),
|
||||
"throw statement should create a Return-kind node"
|
||||
);
|
||||
|
||||
// eval after throw should be unreachable
|
||||
let reachable = crate::cfg_analysis::dominators::reachable_set(&cfg, entry);
|
||||
let eval_nodes: Vec<_> = cfg
|
||||
.node_indices()
|
||||
.filter(|&idx| cfg[idx].callee.as_deref().is_some_and(|c| c == "eval"))
|
||||
.collect();
|
||||
|
||||
// eval might not even be in the CFG, or if it is, it should be unreachable
|
||||
if !eval_nodes.is_empty() {
|
||||
assert!(
|
||||
eval_nodes.iter().all(|n| !reachable.contains(n)),
|
||||
"eval after throw should be unreachable"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn configured_terminator_stops_flow() {
|
||||
let src = br#"
|
||||
function handler() {
|
||||
process.exit(1);
|
||||
eval("dangerous");
|
||||
}
|
||||
"#;
|
||||
|
||||
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let rules = crate::labels::LangAnalysisRules {
|
||||
extra_labels: vec![],
|
||||
terminators: vec!["process.exit".into()],
|
||||
event_handlers: vec![],
|
||||
};
|
||||
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, entry, _) = build_cfg(&tree, src, "javascript", "test.js", Some(&rules));
|
||||
|
||||
let reachable = crate::cfg_analysis::dominators::reachable_set(&cfg, entry);
|
||||
|
||||
// eval should be unreachable since process.exit is a terminator
|
||||
let eval_nodes: Vec<_> = cfg
|
||||
.node_indices()
|
||||
.filter(|&idx| cfg[idx].callee.as_deref().is_some_and(|c| c == "eval"))
|
||||
.collect();
|
||||
|
||||
if !eval_nodes.is_empty() {
|
||||
assert!(
|
||||
eval_nodes.iter().all(|n| !reachable.contains(n)),
|
||||
"eval should be unreachable after process.exit terminator"
|
||||
);
|
||||
}
|
||||
// If eval_nodes is empty it means the node wasn't created (also acceptable —
|
||||
// it's after a terminator so the CFG may not even emit it)
|
||||
}
|
||||
|
||||
// ─── Href classification tests ─────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn location_href_assignment_is_sink() {
|
||||
let src = br#"
|
||||
function redirect(url) {
|
||||
location.href = url;
|
||||
}
|
||||
"#;
|
||||
|
||||
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, _entry, _summaries) = build_cfg(&tree, src, "javascript", "test.js", None);
|
||||
|
||||
let has_sink = cfg
|
||||
.node_indices()
|
||||
.any(|idx| matches!(cfg[idx].label, Some(crate::labels::DataLabel::Sink(_))));
|
||||
assert!(has_sink, "location.href = url should produce a Sink node");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn a_href_assignment_is_not_sink() {
|
||||
let src = br#"
|
||||
function setLink(el) {
|
||||
el.href = "/about";
|
||||
}
|
||||
"#;
|
||||
|
||||
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, _entry, _summaries) = build_cfg(&tree, src, "javascript", "test.js", None);
|
||||
|
||||
let has_sink = cfg
|
||||
.node_indices()
|
||||
.any(|idx| matches!(cfg[idx].label, Some(crate::labels::DataLabel::Sink(_))));
|
||||
assert!(
|
||||
!has_sink,
|
||||
"el.href = '/about' should NOT produce a Sink node"
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Config sanitizer tests ────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn config_sanitizer_suppresses_unguarded_sink() {
|
||||
// JS snippet: escapeHtml(x) before innerHTML = ... should not trigger
|
||||
// cfg-unguarded-sink when escapeHtml is configured as a sanitizer.
|
||||
let src = br#"
|
||||
function render(input) {
|
||||
var safe = escapeHtml(input);
|
||||
document.body.innerHTML = safe;
|
||||
}
|
||||
"#;
|
||||
|
||||
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let lang_str = "javascript";
|
||||
|
||||
// Build with config sanitizer rules
|
||||
let rules = crate::labels::LangAnalysisRules {
|
||||
extra_labels: vec![crate::labels::RuntimeLabelRule {
|
||||
matchers: vec!["escapeHtml".into()],
|
||||
label: crate::labels::DataLabel::Sanitizer(crate::labels::Cap::HTML_ESCAPE),
|
||||
}],
|
||||
terminators: vec![],
|
||||
event_handlers: vec![],
|
||||
};
|
||||
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs", Some(&rules));
|
||||
let lang = Lang::from_slug(lang_str).unwrap();
|
||||
let ctx = AnalysisContext {
|
||||
cfg: &cfg,
|
||||
entry,
|
||||
lang,
|
||||
file_path: "test.rs",
|
||||
source_bytes: src,
|
||||
func_summaries: &summaries,
|
||||
global_summaries: None,
|
||||
taint_findings: &[],
|
||||
analysis_rules: Some(&rules),
|
||||
taint_active: true,
|
||||
};
|
||||
let findings = run_all(&ctx);
|
||||
|
||||
let unguarded = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-unguarded-sink")
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert!(
|
||||
unguarded.is_empty(),
|
||||
"escapeHtml config sanitizer should suppress cfg-unguarded-sink; got {:?}",
|
||||
unguarded
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Python precision tests ────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn python_constant_subprocess_no_finding() {
|
||||
// subprocess.run(["make","clean"]) with constant args should produce no finding
|
||||
let src = br#"
|
||||
import subprocess
|
||||
|
||||
def build():
|
||||
subprocess.run(["make", "clean"])
|
||||
"#;
|
||||
|
||||
let findings = parse_and_run_all(src, "python", Language::from(tree_sitter_python::LANGUAGE));
|
||||
|
||||
let unguarded: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-unguarded-sink")
|
||||
.collect();
|
||||
assert!(
|
||||
unguarded.is_empty(),
|
||||
"subprocess.run with constant list args should not be flagged; got {:?}",
|
||||
unguarded
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn python_constant_git_status_no_finding() {
|
||||
let src = br#"
|
||||
import subprocess
|
||||
|
||||
def check():
|
||||
subprocess.run(["git", "status"])
|
||||
"#;
|
||||
|
||||
let findings = parse_and_run_all(src, "python", Language::from(tree_sitter_python::LANGUAGE));
|
||||
|
||||
let unguarded: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-unguarded-sink")
|
||||
.collect();
|
||||
assert!(
|
||||
unguarded.is_empty(),
|
||||
"subprocess.run with constant git args should not be flagged; got {:?}",
|
||||
unguarded
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn python_tainted_os_system_produces_finding() {
|
||||
// Source (sys.argv) flowing to os.system → should produce a finding
|
||||
let src = br#"
|
||||
import sys
|
||||
import os
|
||||
|
||||
def run():
|
||||
cmd = sys.argv[1]
|
||||
os.system(cmd)
|
||||
"#;
|
||||
|
||||
let findings = parse_and_run_all(src, "python", Language::from(tree_sitter_python::LANGUAGE));
|
||||
|
||||
let sink_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| {
|
||||
f.rule_id == "cfg-unguarded-sink" && f.severity == crate::patterns::Severity::High
|
||||
})
|
||||
.collect();
|
||||
assert!(
|
||||
!sink_findings.is_empty(),
|
||||
"Source-derived os.system should produce a HIGH finding"
|
||||
);
|
||||
}
|
||||
|
||||
// ─── C++ precision tests ───────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn cpp_cout_not_a_sink() {
|
||||
let src = br#"
|
||||
#include <iostream>
|
||||
int main() {
|
||||
std::cout << "hello" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
"#;
|
||||
|
||||
let findings = parse_and_run_all(src, "cpp", Language::from(tree_sitter_cpp::LANGUAGE));
|
||||
|
||||
let sink_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-unguarded-sink")
|
||||
.collect();
|
||||
assert!(
|
||||
sink_findings.is_empty(),
|
||||
"std::cout should not produce an unguarded-sink finding; got {:?}",
|
||||
sink_findings
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cpp_printf_constant_no_finding() {
|
||||
// printf with constant args → FMT_STRING sink but constant-arg suppression
|
||||
let src = br#"
|
||||
#include <stdio.h>
|
||||
int main() {
|
||||
printf("hello\n");
|
||||
return 0;
|
||||
}
|
||||
"#;
|
||||
|
||||
let findings = parse_and_run_all(src, "c", Language::from(tree_sitter_c::LANGUAGE));
|
||||
|
||||
let unguarded: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-unguarded-sink")
|
||||
.collect();
|
||||
assert!(
|
||||
unguarded.is_empty(),
|
||||
"printf with constant args should be suppressed; got {:?}",
|
||||
unguarded
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cpp_system_with_getenv_produces_finding() {
|
||||
let src = br#"
|
||||
#include <stdlib.h>
|
||||
int main() {
|
||||
char* input = getenv("USER_CMD");
|
||||
system(input);
|
||||
return 0;
|
||||
}
|
||||
"#;
|
||||
|
||||
let findings = parse_and_run_all(src, "c", Language::from(tree_sitter_c::LANGUAGE));
|
||||
|
||||
let sink_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-unguarded-sink")
|
||||
.collect();
|
||||
assert!(
|
||||
!sink_findings.is_empty(),
|
||||
"system(getenv(...)) should produce an unguarded-sink finding"
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Unreachable + unguarded dedup test ─────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn unreachable_sink_suppresses_unguarded() {
|
||||
// If a sink is in unreachable code, only cfg-unreachable-sink should fire,
|
||||
// NOT also cfg-unguarded-sink.
|
||||
let src = br#"
|
||||
fn main() {
|
||||
return;
|
||||
std::process::Command::new("sh").arg("x").status().unwrap();
|
||||
}
|
||||
"#;
|
||||
|
||||
let findings = parse_and_run_all(src, "rust", Language::from(tree_sitter_rust::LANGUAGE));
|
||||
|
||||
let unreachable: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-unreachable-sink")
|
||||
.collect();
|
||||
let unguarded_at_same_span: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| {
|
||||
f.rule_id == "cfg-unguarded-sink" && unreachable.iter().any(|u| u.span == f.span)
|
||||
})
|
||||
.collect();
|
||||
assert!(
|
||||
unguarded_at_same_span.is_empty(),
|
||||
"cfg-unguarded-sink should be suppressed when cfg-unreachable-sink fires on same span; got {:?}",
|
||||
unguarded_at_same_span
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Fix 3: Wrapper resource names (curlx_fopen/curlx_fclose) ──────
|
||||
|
||||
#[test]
|
||||
fn curlx_fopen_with_curlx_fclose_no_leak() {
|
||||
let src = br#"
|
||||
void process() {
|
||||
FILE *fp = curlx_fopen("file.txt", "r");
|
||||
curlx_fclose(fp);
|
||||
}
|
||||
"#;
|
||||
|
||||
let findings = parse_and_analyse(
|
||||
&resources::ResourceMisuse,
|
||||
src,
|
||||
"c",
|
||||
Language::from(tree_sitter_c::LANGUAGE),
|
||||
);
|
||||
|
||||
let leak_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-resource-leak")
|
||||
.collect();
|
||||
assert!(
|
||||
leak_findings.is_empty(),
|
||||
"curlx_fopen + curlx_fclose should not produce a resource leak; got {:?}",
|
||||
leak_findings
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Fix 4: freopen exclusion ───────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn freopen_not_treated_as_acquire() {
|
||||
let src = br#"
|
||||
void redirect_stderr() {
|
||||
freopen("/dev/null", "w", stderr);
|
||||
}
|
||||
"#;
|
||||
|
||||
let findings = parse_and_analyse(
|
||||
&resources::ResourceMisuse,
|
||||
src,
|
||||
"c",
|
||||
Language::from(tree_sitter_c::LANGUAGE),
|
||||
);
|
||||
|
||||
let leak_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-resource-leak")
|
||||
.collect();
|
||||
assert!(
|
||||
leak_findings.is_empty(),
|
||||
"freopen should not produce a resource leak finding; got {:?}",
|
||||
leak_findings
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Fix 5: Struct field ownership transfer ─────────────────────────
|
||||
|
||||
#[test]
|
||||
fn struct_field_ownership_transfer_no_leak() {
|
||||
let src = br#"
|
||||
void open_stream(struct session *s) {
|
||||
FILE *fp = fopen("data.txt", "r");
|
||||
s->stream = fp;
|
||||
s->fopened = 1;
|
||||
}
|
||||
"#;
|
||||
|
||||
let findings = parse_and_analyse(
|
||||
&resources::ResourceMisuse,
|
||||
src,
|
||||
"c",
|
||||
Language::from(tree_sitter_c::LANGUAGE),
|
||||
);
|
||||
|
||||
let leak_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-resource-leak")
|
||||
.collect();
|
||||
assert!(
|
||||
leak_findings.is_empty(),
|
||||
"Struct field ownership transfer should suppress resource leak; got {:?}",
|
||||
leak_findings
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Fix 6: Linked-list / global insertion ──────────────────────────
|
||||
|
||||
#[test]
|
||||
fn linked_list_insertion_no_leak() {
|
||||
let src = br#"
|
||||
void add_var(struct config *cfg, const char *name) {
|
||||
struct var *p = malloc(sizeof(struct var));
|
||||
p->next = cfg->variables;
|
||||
cfg->variables = p;
|
||||
}
|
||||
"#;
|
||||
|
||||
let findings = parse_and_analyse(
|
||||
&resources::ResourceMisuse,
|
||||
src,
|
||||
"c",
|
||||
Language::from(tree_sitter_c::LANGUAGE),
|
||||
);
|
||||
|
||||
let leak_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-resource-leak")
|
||||
.collect();
|
||||
assert!(
|
||||
leak_findings.is_empty(),
|
||||
"Linked-list insertion should suppress resource leak; got {:?}",
|
||||
leak_findings
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Fix 2: Preproc dangling-else CFG recovery ─────────────────────
|
||||
|
||||
#[test]
|
||||
fn preproc_ifdef_does_not_orphan_subsequent_code() {
|
||||
// After a #ifdef block containing an if/else, subsequent code should
|
||||
// still be reachable (no unreachable findings).
|
||||
let src = br#"
|
||||
void process() {
|
||||
int x = 1;
|
||||
#ifdef _WIN32
|
||||
if (x) {
|
||||
x = 2;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
x = 3;
|
||||
}
|
||||
free(x);
|
||||
}
|
||||
"#;
|
||||
|
||||
let ts_lang = Language::from(tree_sitter_c::LANGUAGE);
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, entry, _) = build_cfg(&tree, src, "c", "test.c", None);
|
||||
|
||||
let reachable = dominators::reachable_set(&cfg, entry);
|
||||
|
||||
// All nodes should be reachable — the preproc recovery should prevent
|
||||
// the dangling-else from orphaning downstream code.
|
||||
let unreachable_count = cfg.node_count() - reachable.len();
|
||||
assert!(
|
||||
unreachable_count == 0,
|
||||
"Expected all nodes reachable after preproc block, but {} nodes are unreachable",
|
||||
unreachable_count
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Fix 1: Break in loop keeps post-loop code reachable ────────────
|
||||
|
||||
#[test]
|
||||
fn break_in_loop_post_loop_reachable() {
|
||||
let src = br#"
|
||||
void process() {
|
||||
int x = 0;
|
||||
while(1) {
|
||||
if(x) break;
|
||||
x = x + 1;
|
||||
}
|
||||
free(x);
|
||||
}
|
||||
"#;
|
||||
|
||||
let ts_lang = Language::from(tree_sitter_c::LANGUAGE);
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, entry, _) = build_cfg(&tree, src, "c", "test.c", None);
|
||||
|
||||
let reachable = dominators::reachable_set(&cfg, entry);
|
||||
|
||||
// All nodes should be reachable — break exits the loop and post-loop
|
||||
// code (free(x)) should be connected.
|
||||
let unreachable_count = cfg.node_count() - reachable.len();
|
||||
assert!(
|
||||
unreachable_count == 0,
|
||||
"Expected all nodes reachable after break in loop, but {} nodes are unreachable",
|
||||
unreachable_count
|
||||
);
|
||||
}
|
||||
|
||||
// ─── PART 2A: One-hop constant binding trace ────────────────────────
|
||||
|
||||
#[test]
|
||||
fn python_one_hop_constant_binding_no_finding() {
|
||||
// cmd = "git"; subprocess.run([cmd, "status"]) → no finding
|
||||
let src = br#"
|
||||
import subprocess
|
||||
|
||||
def check():
|
||||
cmd = "git"
|
||||
subprocess.run([cmd, "status"])
|
||||
"#;
|
||||
|
||||
let findings = parse_and_run_all(src, "python", Language::from(tree_sitter_python::LANGUAGE));
|
||||
|
||||
let unguarded: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-unguarded-sink")
|
||||
.collect();
|
||||
assert!(
|
||||
unguarded.is_empty(),
|
||||
"One-hop constant binding should suppress cfg-unguarded-sink; got {:?}",
|
||||
unguarded
|
||||
);
|
||||
}
|
||||
|
||||
// ─── PART 2B: Exec-path guard rules ─────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn exec_path_guard_suppresses_unguarded_sink() {
|
||||
// resolve_binary(&bin); Command::new(bin); → no finding
|
||||
let src = br#"
|
||||
use std::process::Command;
|
||||
fn main() {
|
||||
let bin = std::env::var("BIN").unwrap();
|
||||
resolve_binary(&bin);
|
||||
Command::new("sh").arg(&bin).status().unwrap();
|
||||
}"#;
|
||||
|
||||
let findings = parse_and_analyse(
|
||||
&guards::UnguardedSink,
|
||||
src,
|
||||
"rust",
|
||||
Language::from(tree_sitter_rust::LANGUAGE),
|
||||
);
|
||||
|
||||
let unguarded: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-unguarded-sink")
|
||||
.collect();
|
||||
assert!(
|
||||
unguarded.is_empty(),
|
||||
"resolve_binary guard should suppress cfg-unguarded-sink; got {:?}",
|
||||
unguarded
|
||||
);
|
||||
}
|
||||
|
||||
// ─── PART 2C: Evidence-based severity in cfg-only mode ──────────────
|
||||
|
||||
#[test]
|
||||
fn cfg_only_no_taint_produces_low_severity() {
|
||||
// In cfg-only mode (taint_active=false) with no source-derived evidence,
|
||||
// unguarded sink should produce LOW severity instead of MEDIUM.
|
||||
let src = br#"
|
||||
use std::process::Command;
|
||||
fn process_data() {
|
||||
let x = compute_something();
|
||||
Command::new("sh").arg(&x).status().unwrap();
|
||||
}"#;
|
||||
|
||||
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let lang = Lang::from_slug("rust").unwrap();
|
||||
let ctx = AnalysisContext {
|
||||
cfg: &cfg,
|
||||
entry,
|
||||
lang,
|
||||
file_path: "test.rs",
|
||||
source_bytes: src,
|
||||
func_summaries: &summaries,
|
||||
global_summaries: None,
|
||||
taint_findings: &[],
|
||||
analysis_rules: None,
|
||||
taint_active: false, // cfg-only mode
|
||||
};
|
||||
let findings = guards::UnguardedSink.run(&ctx);
|
||||
|
||||
let medium_or_high: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| {
|
||||
f.rule_id == "cfg-unguarded-sink"
|
||||
&& (f.severity == crate::patterns::Severity::Medium
|
||||
|| f.severity == crate::patterns::Severity::High)
|
||||
})
|
||||
.collect();
|
||||
assert!(
|
||||
medium_or_high.is_empty(),
|
||||
"cfg-only mode without taint should produce LOW severity, not MEDIUM/HIGH; got {:?}",
|
||||
medium_or_high
|
||||
);
|
||||
}
|
||||
|
||||
// ─── PART 4B: FileResponse ownership transfer ──────────────────────
|
||||
|
||||
#[test]
|
||||
fn file_response_ownership_transfer_no_leak() {
|
||||
let src = br#"
|
||||
def serve_file():
|
||||
f = open("report.pdf", "rb")
|
||||
return FileResponse(f)
|
||||
"#;
|
||||
|
||||
let findings = parse_and_analyse(
|
||||
&resources::ResourceMisuse,
|
||||
src,
|
||||
"python",
|
||||
Language::from(tree_sitter_python::LANGUAGE),
|
||||
);
|
||||
|
||||
let leak_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-resource-leak")
|
||||
.collect();
|
||||
assert!(
|
||||
leak_findings.is_empty(),
|
||||
"FileResponse should suppress cfg-resource-leak; got {:?}",
|
||||
leak_findings
|
||||
);
|
||||
}
|
||||
|
||||
// ─── PART 4C: Lock-not-released refinement ──────────────────────────
|
||||
|
||||
#[test]
|
||||
fn python_lock_constructor_only_no_finding() {
|
||||
// threading.Lock() without .acquire() → no finding
|
||||
let src = br#"
|
||||
import threading
|
||||
|
||||
def setup():
|
||||
lock = threading.Lock()
|
||||
do_work()
|
||||
"#;
|
||||
|
||||
let findings = parse_and_analyse(
|
||||
&resources::ResourceMisuse,
|
||||
src,
|
||||
"python",
|
||||
Language::from(tree_sitter_python::LANGUAGE),
|
||||
);
|
||||
|
||||
let lock_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-lock-not-released")
|
||||
.collect();
|
||||
assert!(
|
||||
lock_findings.is_empty(),
|
||||
"Lock constructor without acquire should not produce cfg-lock-not-released; got {:?}",
|
||||
lock_findings
|
||||
);
|
||||
}
|
||||
|
||||
// ─── PART 4A: signal.connect exclusion ──────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn python_signal_connect_not_treated_as_db_acquire() {
|
||||
let src = br#"
|
||||
def setup():
|
||||
signal.connect(handler)
|
||||
do_work()
|
||||
"#;
|
||||
|
||||
let findings = parse_and_analyse(
|
||||
&resources::ResourceMisuse,
|
||||
src,
|
||||
"python",
|
||||
Language::from(tree_sitter_python::LANGUAGE),
|
||||
);
|
||||
|
||||
let leak_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-resource-leak")
|
||||
.collect();
|
||||
assert!(
|
||||
leak_findings.is_empty(),
|
||||
"signal.connect should not be treated as db acquire; got {:?}",
|
||||
leak_findings
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,9 +3,40 @@ use super::{AnalysisContext, CfgAnalysis, CfgFinding, Confidence};
|
|||
use crate::cfg::StmtKind;
|
||||
use crate::labels::DataLabel;
|
||||
use crate::patterns::Severity;
|
||||
use std::collections::HashSet;
|
||||
|
||||
pub struct UnreachableCode;
|
||||
|
||||
/// Collect function names that appear as arguments to configured event handler calls.
|
||||
fn event_handler_callbacks(ctx: &AnalysisContext) -> HashSet<String> {
|
||||
let mut callbacks = HashSet::new();
|
||||
let handlers = match ctx.analysis_rules {
|
||||
Some(rules) if !rules.event_handlers.is_empty() => &rules.event_handlers,
|
||||
_ => return callbacks,
|
||||
};
|
||||
|
||||
for idx in ctx.cfg.node_indices() {
|
||||
let info = &ctx.cfg[idx];
|
||||
if info.kind != StmtKind::Call {
|
||||
continue;
|
||||
}
|
||||
if let Some(callee) = &info.callee {
|
||||
let callee_lower = callee.to_ascii_lowercase();
|
||||
let is_handler = handlers
|
||||
.iter()
|
||||
.any(|h| callee_lower.ends_with(&h.to_ascii_lowercase()));
|
||||
if is_handler {
|
||||
// The callback function is typically used within the call — any function
|
||||
// that appears as `uses` of this call node is a potential callback.
|
||||
for u in &info.uses {
|
||||
callbacks.insert(u.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
callbacks
|
||||
}
|
||||
|
||||
impl CfgAnalysis for UnreachableCode {
|
||||
fn name(&self) -> &'static str {
|
||||
"unreachable-code"
|
||||
|
|
@ -13,6 +44,7 @@ impl CfgAnalysis for UnreachableCode {
|
|||
|
||||
fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding> {
|
||||
let reachable = dominators::reachable_set(ctx.cfg, ctx.entry);
|
||||
let handler_callbacks = event_handler_callbacks(ctx);
|
||||
let mut findings = Vec::new();
|
||||
|
||||
for idx in ctx.cfg.node_indices() {
|
||||
|
|
@ -27,6 +59,13 @@ impl CfgAnalysis for UnreachableCode {
|
|||
continue;
|
||||
}
|
||||
|
||||
// Suppress findings for nodes inside event handler callbacks
|
||||
if let Some(func_name) = &info.enclosing_func
|
||||
&& handler_callbacks.contains(func_name)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
let (rule_id, title, severity) = match info.label {
|
||||
Some(DataLabel::Sanitizer(_)) => (
|
||||
"cfg-unreachable-sanitizer",
|
||||
|
|
@ -43,7 +82,9 @@ impl CfgAnalysis for UnreachableCode {
|
|||
),
|
||||
_ => {
|
||||
// Check if it's a guard/auth call
|
||||
if super::is_guard_call(info, ctx.lang) || super::is_auth_call(info, ctx.lang) {
|
||||
if super::is_guard_call(info, ctx.lang, ctx.analysis_rules)
|
||||
|| super::is_auth_call(info, ctx.lang)
|
||||
{
|
||||
(
|
||||
"cfg-unreachable-guard",
|
||||
"Unreachable guard/auth check",
|
||||
|
|
|
|||
62
src/cli.rs
62
src/cli.rs
|
|
@ -9,6 +9,14 @@ pub struct Cli {
|
|||
pub(crate) command: Commands,
|
||||
}
|
||||
|
||||
impl Commands {
|
||||
/// Whether this command produces structured (machine-readable) output on
|
||||
/// stdout, meaning human status messages must be suppressed entirely.
|
||||
pub fn is_structured_output(&self) -> bool {
|
||||
matches!(self, Commands::Scan { format, .. } if format == "json" || format == "sarif")
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
pub enum Commands {
|
||||
/// Scan project for vulnerabilities
|
||||
|
|
@ -25,8 +33,8 @@ pub enum Commands {
|
|||
#[arg(long)]
|
||||
rebuild_index: bool,
|
||||
|
||||
/// Output format
|
||||
#[arg(short, long, value_enum, default_value = "")]
|
||||
/// Output format (console, json, sarif)
|
||||
#[arg(short, long, default_value = "")]
|
||||
format: String,
|
||||
|
||||
/// Show only high severity issues
|
||||
|
|
@ -41,6 +49,11 @@ pub enum Commands {
|
|||
|
||||
#[arg(long)]
|
||||
all_targets: bool,
|
||||
|
||||
/// Include findings from test/vendor/build paths at original severity
|
||||
/// (by default these are downgraded)
|
||||
#[arg(long)]
|
||||
include_nonprod: bool,
|
||||
},
|
||||
|
||||
/// Manage project indexes
|
||||
|
|
@ -65,6 +78,51 @@ pub enum Commands {
|
|||
#[arg(long)]
|
||||
all: bool,
|
||||
},
|
||||
|
||||
/// Manage analysis configuration
|
||||
Config {
|
||||
#[command(subcommand)]
|
||||
action: ConfigAction,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
pub enum ConfigAction {
|
||||
/// Print effective merged configuration as TOML
|
||||
Show,
|
||||
|
||||
/// Print configuration directory path
|
||||
Path,
|
||||
|
||||
/// Add a label rule to nyx.local
|
||||
AddRule {
|
||||
/// Language slug (e.g. javascript, rust, python)
|
||||
#[arg(long)]
|
||||
lang: String,
|
||||
|
||||
/// Function or property name to match
|
||||
#[arg(long)]
|
||||
matcher: String,
|
||||
|
||||
/// Rule kind: source, sanitizer, or sink
|
||||
#[arg(long)]
|
||||
kind: String,
|
||||
|
||||
/// Capability: env_var, html_escape, shell_escape, url_encode, json_parse, file_io, or all
|
||||
#[arg(long)]
|
||||
cap: String,
|
||||
},
|
||||
|
||||
/// Add a terminator function to nyx.local
|
||||
AddTerminator {
|
||||
/// Language slug (e.g. javascript, rust, python)
|
||||
#[arg(long)]
|
||||
lang: String,
|
||||
|
||||
/// Function name that terminates execution (e.g. process.exit)
|
||||
#[arg(long)]
|
||||
name: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
|
|
|
|||
213
src/commands/config.rs
Normal file
213
src/commands/config.rs
Normal file
|
|
@ -0,0 +1,213 @@
|
|||
use crate::errors::NyxResult;
|
||||
use crate::utils::config::{AnalysisRulesConfig, Config, ConfigLabelRule};
|
||||
use console::style;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
/// Show the effective merged configuration as TOML.
|
||||
pub fn show(config: &Config) -> NyxResult<()> {
|
||||
let toml_str =
|
||||
toml::to_string_pretty(config).map_err(|e| format!("Failed to serialize config: {e}"))?;
|
||||
println!("{toml_str}");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Print the configuration directory path.
|
||||
pub fn path(config_dir: &Path) -> NyxResult<()> {
|
||||
println!("{}", config_dir.display());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add a label rule to `nyx.local`.
|
||||
pub fn add_rule(
|
||||
config_dir: &Path,
|
||||
lang: &str,
|
||||
matcher: &str,
|
||||
kind: &str,
|
||||
cap: &str,
|
||||
) -> NyxResult<()> {
|
||||
// Validate kind
|
||||
if !["source", "sanitizer", "sink"].contains(&kind) {
|
||||
return Err(
|
||||
format!("Invalid kind '{kind}'. Must be one of: source, sanitizer, sink").into(),
|
||||
);
|
||||
}
|
||||
|
||||
// Validate cap
|
||||
if crate::labels::parse_cap(cap).is_none() {
|
||||
return Err(format!(
|
||||
"Invalid cap '{cap}'. Must be one of: env_var, html_escape, shell_escape, url_encode, json_parse, file_io, all"
|
||||
)
|
||||
.into());
|
||||
}
|
||||
|
||||
let local_path = config_dir.join("nyx.local");
|
||||
let mut config: Config = if local_path.exists() {
|
||||
let content = fs::read_to_string(&local_path)?;
|
||||
toml::from_str(&content)?
|
||||
} else {
|
||||
Config::default()
|
||||
};
|
||||
|
||||
let lang_cfg = config
|
||||
.analysis
|
||||
.languages
|
||||
.entry(lang.to_string())
|
||||
.or_default();
|
||||
|
||||
let new_rule = ConfigLabelRule {
|
||||
matchers: vec![matcher.to_string()],
|
||||
kind: kind.to_string(),
|
||||
cap: cap.to_string(),
|
||||
};
|
||||
|
||||
// Dedup
|
||||
if !lang_cfg.rules.contains(&new_rule) {
|
||||
lang_cfg.rules.push(new_rule);
|
||||
}
|
||||
|
||||
write_local_config(&local_path, &config)?;
|
||||
|
||||
println!(
|
||||
"{}: Added {} rule for `{}` ({}) in {}",
|
||||
style("ok").green().bold(),
|
||||
kind,
|
||||
matcher,
|
||||
cap,
|
||||
lang
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add a terminator to `nyx.local`.
|
||||
pub fn add_terminator(config_dir: &Path, lang: &str, name: &str) -> NyxResult<()> {
|
||||
let local_path = config_dir.join("nyx.local");
|
||||
let mut config: Config = if local_path.exists() {
|
||||
let content = fs::read_to_string(&local_path)?;
|
||||
toml::from_str(&content)?
|
||||
} else {
|
||||
Config::default()
|
||||
};
|
||||
|
||||
let lang_cfg = config
|
||||
.analysis
|
||||
.languages
|
||||
.entry(lang.to_string())
|
||||
.or_default();
|
||||
|
||||
if !lang_cfg.terminators.contains(&name.to_string()) {
|
||||
lang_cfg.terminators.push(name.to_string());
|
||||
}
|
||||
|
||||
write_local_config(&local_path, &config)?;
|
||||
|
||||
println!(
|
||||
"{}: Added terminator `{}` for {}",
|
||||
style("ok").green().bold(),
|
||||
name,
|
||||
lang
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Write only the non-default portions to nyx.local.
|
||||
fn write_local_config(path: &Path, config: &Config) -> NyxResult<()> {
|
||||
// Only write the analysis section to nyx.local to keep it minimal.
|
||||
// Other settings keep their defaults unless previously customized.
|
||||
let mut local = Config {
|
||||
analysis: config.analysis.clone(),
|
||||
..Config::default()
|
||||
};
|
||||
|
||||
// Strip empty language entries
|
||||
local.analysis.languages.retain(|_, v| {
|
||||
!v.rules.is_empty() || !v.terminators.is_empty() || !v.event_handlers.is_empty()
|
||||
});
|
||||
|
||||
// If no analysis rules, only write the analysis section
|
||||
if local.analysis.languages.is_empty() {
|
||||
local.analysis = AnalysisRulesConfig::default();
|
||||
}
|
||||
|
||||
let toml_str =
|
||||
toml::to_string_pretty(&local).map_err(|e| format!("Failed to serialize config: {e}"))?;
|
||||
fs::write(path, toml_str)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn add_rule_writes_valid_toml() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
add_rule(
|
||||
dir.path(),
|
||||
"javascript",
|
||||
"escapeHtml",
|
||||
"sanitizer",
|
||||
"html_escape",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let content = fs::read_to_string(dir.path().join("nyx.local")).unwrap();
|
||||
let config: Config = toml::from_str(&content).unwrap();
|
||||
let js = config.analysis.languages.get("javascript").unwrap();
|
||||
assert_eq!(js.rules.len(), 1);
|
||||
assert_eq!(js.rules[0].matchers, vec!["escapeHtml"]);
|
||||
assert_eq!(js.rules[0].kind, "sanitizer");
|
||||
assert_eq!(js.rules[0].cap, "html_escape");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_rule_deduplicates() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
add_rule(
|
||||
dir.path(),
|
||||
"javascript",
|
||||
"escapeHtml",
|
||||
"sanitizer",
|
||||
"html_escape",
|
||||
)
|
||||
.unwrap();
|
||||
add_rule(
|
||||
dir.path(),
|
||||
"javascript",
|
||||
"escapeHtml",
|
||||
"sanitizer",
|
||||
"html_escape",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let content = fs::read_to_string(dir.path().join("nyx.local")).unwrap();
|
||||
let config: Config = toml::from_str(&content).unwrap();
|
||||
let js = config.analysis.languages.get("javascript").unwrap();
|
||||
assert_eq!(js.rules.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_terminator_works() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
add_terminator(dir.path(), "javascript", "process.exit").unwrap();
|
||||
|
||||
let content = fs::read_to_string(dir.path().join("nyx.local")).unwrap();
|
||||
let config: Config = toml::from_str(&content).unwrap();
|
||||
let js = config.analysis.languages.get("javascript").unwrap();
|
||||
assert_eq!(js.terminators, vec!["process.exit"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_rule_rejects_invalid_kind() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let result = add_rule(dir.path(), "javascript", "foo", "invalid_kind", "all");
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_rule_rejects_invalid_cap() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let result = add_rule(dir.path(), "javascript", "foo", "sanitizer", "invalid_cap");
|
||||
assert!(result.is_err());
|
||||
}
|
||||
}
|
||||
|
|
@ -5,10 +5,10 @@ use crate::patterns::Severity;
|
|||
use crate::utils::Config;
|
||||
use crate::utils::project::get_project_info;
|
||||
use crate::walk::spawn_file_walker;
|
||||
use blake3;
|
||||
use bytesize::ByteSize;
|
||||
use chrono::{DateTime, Local};
|
||||
use console::style;
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use rayon::prelude::*;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
|
@ -25,7 +25,13 @@ pub fn handle(
|
|||
let (project_name, db_path) = get_project_info(&build_path, database_dir)?;
|
||||
|
||||
if force || !db_path.exists() {
|
||||
build_index(&project_name, &build_path, &db_path, config)?;
|
||||
build_index(
|
||||
&project_name,
|
||||
&build_path,
|
||||
&db_path,
|
||||
config,
|
||||
!config.output.quiet,
|
||||
)?;
|
||||
println!(
|
||||
"✔ {} {}",
|
||||
style("Index built:").green(),
|
||||
|
|
@ -84,6 +90,7 @@ pub fn build_index(
|
|||
project_path: &std::path::Path,
|
||||
db_path: &std::path::Path,
|
||||
config: &Config,
|
||||
show_progress: bool,
|
||||
) -> NyxResult<()> {
|
||||
tracing::debug!("Building index for: {}", project_name);
|
||||
fs::File::create(db_path)?;
|
||||
|
|
@ -97,10 +104,27 @@ pub fn build_index(
|
|||
tracing::debug!("Cleaned index for: {}", project_name);
|
||||
|
||||
let (rx, handle) = spawn_file_walker(project_path, config);
|
||||
// Drain the channel BEFORE joining — the bounded channel will deadlock
|
||||
// if we join first and the walker blocks on send.
|
||||
let paths: Vec<PathBuf> = rx.into_iter().flatten().collect();
|
||||
if let Err(err) = handle.join() {
|
||||
tracing::error!("walker thread panicked: {:#?}", err);
|
||||
}
|
||||
let paths: Vec<PathBuf> = rx.into_iter().flatten().collect();
|
||||
|
||||
let pb = if show_progress {
|
||||
let pb = ProgressBar::new(paths.len() as u64);
|
||||
pb.set_style(
|
||||
ProgressStyle::with_template(
|
||||
"{spinner:.green} {msg} [{bar:30.cyan/blue}] {pos}/{len} ({eta})",
|
||||
)
|
||||
.unwrap()
|
||||
.progress_chars("##-"),
|
||||
);
|
||||
pb.set_message("Indexing files");
|
||||
pb
|
||||
} else {
|
||||
ProgressBar::hidden()
|
||||
};
|
||||
|
||||
paths
|
||||
.into_par_iter()
|
||||
|
|
@ -108,18 +132,15 @@ pub fn build_index(
|
|||
let mut idx = Indexer::from_pool(project_name, &pool)?;
|
||||
|
||||
// Read once, hash once — pass bytes to both rule execution and
|
||||
// summary extraction.
|
||||
// summary extraction. Use pre-computed hash for upsert to avoid
|
||||
// a redundant file read inside upsert_file.
|
||||
let bytes = std::fs::read(&path)?;
|
||||
let hash = {
|
||||
let mut hasher = blake3::Hasher::new();
|
||||
hasher.update(&bytes);
|
||||
hasher.finalize().as_bytes().to_vec()
|
||||
};
|
||||
let hash = Indexer::digest_bytes(&bytes);
|
||||
|
||||
// Run AST-only rules (no taint yet — summaries come later in scan)
|
||||
let issues =
|
||||
crate::commands::scan::run_rules_on_bytes(&bytes, &path, config, None, None)?;
|
||||
let file_id = idx.upsert_file(&path)?;
|
||||
let file_id = idx.upsert_file_with_hash(&path, &hash)?;
|
||||
|
||||
let rows: Vec<IssueRow> = issues
|
||||
.iter()
|
||||
|
|
@ -144,8 +165,10 @@ pub fn build_index(
|
|||
idx.replace_summaries_for_file(&path, &hash, &sums)?;
|
||||
}
|
||||
|
||||
pb.inc(1);
|
||||
Ok(())
|
||||
})?;
|
||||
pb.finish_and_clear();
|
||||
|
||||
{
|
||||
let idx = Indexer::from_pool(project_name, &pool)?;
|
||||
|
|
@ -170,7 +193,7 @@ fn build_index_creates_db_and_registers_files() {
|
|||
|
||||
let db_path = td.path().join("proj.sqlite");
|
||||
|
||||
build_index("proj", &project_dir, &db_path, &cfg).expect("index build should succeed");
|
||||
build_index("proj", &project_dir, &db_path, &cfg, false).expect("index build should succeed");
|
||||
|
||||
// ── Assert ────────────────────────────────────────────────────────────────
|
||||
assert!(db_path.is_file(), "SQLite file must exist");
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
pub mod clean;
|
||||
pub mod config;
|
||||
pub mod index;
|
||||
pub mod list;
|
||||
pub mod scan;
|
||||
|
|
@ -12,6 +13,7 @@ use std::path::Path;
|
|||
pub fn handle_command(
|
||||
command: Commands,
|
||||
database_dir: &Path,
|
||||
config_dir: &Path,
|
||||
config: &mut Config,
|
||||
) -> NyxResult<()> {
|
||||
match command {
|
||||
|
|
@ -24,6 +26,7 @@ pub fn handle_command(
|
|||
ast_only,
|
||||
cfg_only,
|
||||
all_targets,
|
||||
include_nonprod,
|
||||
} => {
|
||||
if high_only {
|
||||
config.scanner.min_severity = Severity::High
|
||||
|
|
@ -41,10 +44,37 @@ pub fn handle_command(
|
|||
config.scanner.mode = AnalysisMode::Full
|
||||
};
|
||||
|
||||
scan::handle(&path, no_index, rebuild_index, format, database_dir, config)
|
||||
if include_nonprod {
|
||||
config.scanner.include_nonprod = true
|
||||
};
|
||||
|
||||
scan::handle(&path, no_index, rebuild_index, format, database_dir, config)?;
|
||||
}
|
||||
Commands::Index { action } => {
|
||||
index::handle(action, database_dir, config)?;
|
||||
}
|
||||
Commands::List { verbose } => {
|
||||
list::handle(verbose, database_dir)?;
|
||||
}
|
||||
Commands::Clean { project, all } => {
|
||||
clean::handle(project, all, database_dir)?;
|
||||
}
|
||||
Commands::Config { action } => {
|
||||
use crate::cli::ConfigAction;
|
||||
match action {
|
||||
ConfigAction::Show => self::config::show(config)?,
|
||||
ConfigAction::Path => self::config::path(config_dir)?,
|
||||
ConfigAction::AddRule {
|
||||
lang,
|
||||
matcher,
|
||||
kind,
|
||||
cap,
|
||||
} => self::config::add_rule(config_dir, &lang, &matcher, &kind, &cap)?,
|
||||
ConfigAction::AddTerminator { lang, name } => {
|
||||
self::config::add_terminator(config_dir, &lang, &name)?
|
||||
}
|
||||
}
|
||||
}
|
||||
Commands::Index { action } => index::handle(action, database_dir, config),
|
||||
Commands::List { verbose } => list::handle(verbose, database_dir),
|
||||
Commands::Clean { project, all } => clean::handle(project, all, database_dir),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,16 +1,16 @@
|
|||
pub(crate) use crate::ast::{
|
||||
extract_summaries_from_bytes, extract_summaries_from_file, run_rules_on_bytes,
|
||||
run_rules_on_file,
|
||||
analyse_file_fused, extract_summaries_from_bytes, run_rules_on_bytes, run_rules_on_file,
|
||||
};
|
||||
use crate::database::index::{Indexer, IssueRow};
|
||||
use crate::errors::NyxResult;
|
||||
use crate::patterns::Severity;
|
||||
use crate::summary::{self, FuncSummary, GlobalSummaries};
|
||||
use crate::summary::{self, GlobalSummaries};
|
||||
use crate::utils::config::Config;
|
||||
use crate::utils::project::get_project_info;
|
||||
use crate::walk::spawn_file_walker;
|
||||
use console::style;
|
||||
use dashmap::DashMap;
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use r2d2::Pool;
|
||||
use r2d2_sqlite::SqliteConnectionManager;
|
||||
use rayon::prelude::*;
|
||||
|
|
@ -18,6 +18,22 @@ use std::collections::BTreeMap;
|
|||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
fn make_progress_bar(len: u64, msg: &str, show: bool) -> ProgressBar {
|
||||
if !show {
|
||||
return ProgressBar::hidden();
|
||||
}
|
||||
let pb = ProgressBar::new(len);
|
||||
pb.set_style(
|
||||
ProgressStyle::with_template(
|
||||
"{spinner:.green} {msg} [{bar:30.cyan/blue}] {pos}/{len} ({eta})",
|
||||
)
|
||||
.unwrap()
|
||||
.progress_chars("##-"),
|
||||
);
|
||||
pb.set_message(msg.to_string());
|
||||
pb
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, serde::Serialize)]
|
||||
pub struct Diag {
|
||||
pub path: String,
|
||||
|
|
@ -39,22 +55,37 @@ pub fn handle(
|
|||
let scan_path = Path::new(path).canonicalize()?;
|
||||
let (project_name, db_path) = get_project_info(&scan_path, database_dir)?;
|
||||
|
||||
println!(
|
||||
"{} {}...\n",
|
||||
style("Checking").green().bold(),
|
||||
&project_name
|
||||
);
|
||||
let suppress_status = config.output.quiet || format == "json" || format == "sarif";
|
||||
if !suppress_status {
|
||||
println!(
|
||||
"{} {}...\n",
|
||||
style("Checking").green().bold(),
|
||||
&project_name
|
||||
);
|
||||
}
|
||||
|
||||
let show_progress = format != "json" && format != "sarif" && !config.output.quiet;
|
||||
|
||||
let diags: Vec<Diag> = if no_index {
|
||||
scan_filesystem(&scan_path, config)?
|
||||
scan_filesystem(&scan_path, config, show_progress)?
|
||||
} else {
|
||||
if rebuild_index || !db_path.exists() {
|
||||
tracing::debug!("Scanning filesystem index filesystem");
|
||||
crate::commands::index::build_index(&project_name, &scan_path, &db_path, config)?;
|
||||
crate::commands::index::build_index(
|
||||
&project_name,
|
||||
&scan_path,
|
||||
&db_path,
|
||||
config,
|
||||
show_progress,
|
||||
)?;
|
||||
}
|
||||
|
||||
let pool = Indexer::init(&db_path)?;
|
||||
scan_with_index_parallel(&project_name, pool, config)?
|
||||
if config.database.vacuum_on_startup {
|
||||
let idx = Indexer::from_pool(&project_name, &pool)?;
|
||||
idx.vacuum()?;
|
||||
}
|
||||
scan_with_index_parallel(&project_name, pool, config, show_progress)?
|
||||
};
|
||||
|
||||
tracing::debug!("Found {:?} issues.", diags.len());
|
||||
|
|
@ -66,6 +97,14 @@ pub fn handle(
|
|||
return Ok(());
|
||||
}
|
||||
|
||||
if format == "sarif" {
|
||||
let sarif = crate::output::build_sarif(&diags, &scan_path);
|
||||
let json = serde_json::to_string_pretty(&sarif)
|
||||
.map_err(|e| crate::errors::NyxError::Msg(e.to_string()))?;
|
||||
println!("{json}");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if format == "console" || (format.is_empty() && config.output.default_format == "console") {
|
||||
tracing::debug!("Printing to console");
|
||||
let mut grouped: BTreeMap<&str, Vec<&Diag>> = BTreeMap::new();
|
||||
|
|
@ -77,10 +116,10 @@ pub fn handle(
|
|||
println!("{}", style(path).blue().underlined());
|
||||
for d in issues {
|
||||
println!(
|
||||
" {:>4}:{:<4} [{:}] {:}",
|
||||
" {:>4}:{:<4} {} {}",
|
||||
d.line,
|
||||
d.col,
|
||||
d.severity,
|
||||
d.severity.colored_tag(),
|
||||
style(&d.id).bold()
|
||||
);
|
||||
}
|
||||
|
|
@ -109,55 +148,144 @@ pub fn handle(
|
|||
/// merged cross‑file summaries.
|
||||
///
|
||||
/// AST pattern queries are run during pass 2 (they don't depend on summaries).
|
||||
pub(crate) fn scan_filesystem(root: &Path, cfg: &Config) -> NyxResult<Vec<Diag>> {
|
||||
pub(crate) fn scan_filesystem(
|
||||
root: &Path,
|
||||
cfg: &Config,
|
||||
show_progress: bool,
|
||||
) -> NyxResult<Vec<Diag>> {
|
||||
// ── Collect file list ────────────────────────────────────────────────
|
||||
let all_paths: Vec<PathBuf> = {
|
||||
let _span = tracing::info_span!("walk_files").entered();
|
||||
let (rx, handle) = spawn_file_walker(root, cfg);
|
||||
// Drain the channel BEFORE joining the walker thread.
|
||||
// The channel is bounded, so joining first would deadlock once
|
||||
// the walker fills it and blocks on send.
|
||||
let paths: Vec<PathBuf> = rx.into_iter().flatten().collect();
|
||||
if let Err(err) = handle.join() {
|
||||
tracing::error!("walker thread panicked: {:#?}", err);
|
||||
}
|
||||
rx.into_iter().flatten().collect()
|
||||
paths
|
||||
};
|
||||
tracing::info!(file_count = all_paths.len(), "file walk complete");
|
||||
|
||||
// ── Pass 1: extract summaries ────────────────────────────────────────
|
||||
let needs_taint = cfg.scanner.mode == crate::utils::config::AnalysisMode::Full
|
||||
|| cfg.scanner.mode == crate::utils::config::AnalysisMode::Taint;
|
||||
|
||||
let global_summaries: Option<GlobalSummaries> = if needs_taint {
|
||||
let _span = tracing::info_span!("pass1_summaries", files = all_paths.len()).entered();
|
||||
if !needs_taint {
|
||||
// ── AST-only: single fused pass (no cross-file context needed) ──
|
||||
let _span = tracing::info_span!("ast_only_analysis", files = all_paths.len()).entered();
|
||||
let pb = make_progress_bar(all_paths.len() as u64, "Running analysis", show_progress);
|
||||
|
||||
let collected: Vec<FuncSummary> = all_paths
|
||||
let mut diags: Vec<Diag> = all_paths
|
||||
.par_iter()
|
||||
.flat_map_iter(|path| match extract_summaries_from_file(path, cfg) {
|
||||
Ok(sums) => sums,
|
||||
Err(e) => {
|
||||
tracing::warn!("pass 1: failed to summarise {}: {e}", path.display());
|
||||
vec![]
|
||||
}
|
||||
.flat_map_iter(|path| {
|
||||
let result = match analyse_file_fused(
|
||||
&std::fs::read(path).unwrap_or_default(),
|
||||
path,
|
||||
cfg,
|
||||
None,
|
||||
Some(root),
|
||||
) {
|
||||
Ok(r) => r.diags,
|
||||
Err(e) => {
|
||||
tracing::warn!("analysis: {}: {e}", path.display());
|
||||
vec![]
|
||||
}
|
||||
};
|
||||
pb.inc(1);
|
||||
result
|
||||
})
|
||||
.collect();
|
||||
pb.finish_and_clear();
|
||||
|
||||
tracing::info!(summaries = collected.len(), "pass 1 complete");
|
||||
let _merge_span = tracing::info_span!("merge_summaries").entered();
|
||||
if let Some(max) = cfg.output.max_results {
|
||||
diags.truncate(max as usize);
|
||||
}
|
||||
return Ok(diags);
|
||||
}
|
||||
|
||||
// ── Taint mode: two-pass with fused pass 1 ──────────────────────────
|
||||
//
|
||||
// Pass 1 (fused): parse + CFG (once!) → extract summaries + run
|
||||
// AST queries + local taint + CFG structural analyses.
|
||||
// Summaries are collected for the cross-file merge.
|
||||
//
|
||||
// Pass 2: re-run full analysis with global summaries injected.
|
||||
// This requires a second parse+CFG, but ONLY for taint-mode files
|
||||
// that need cross-file context. For repos where most functions
|
||||
// don't have unresolved callees, pass 1 results are already correct.
|
||||
|
||||
// ── Pass 1: fused summary extraction + parallel merge ──────────────
|
||||
//
|
||||
// Each rayon thread builds a local `GlobalSummaries` from its chunk,
|
||||
// then the per-thread maps are merged in a binary reduce tree.
|
||||
// This eliminates the serial merge_summaries bottleneck.
|
||||
let global_summaries: GlobalSummaries = {
|
||||
let _span = tracing::info_span!("pass1_fused", files = all_paths.len()).entered();
|
||||
let pb = make_progress_bar(
|
||||
all_paths.len() as u64,
|
||||
"Pass 1: Extracting summaries",
|
||||
show_progress,
|
||||
);
|
||||
let root_str = root.to_string_lossy();
|
||||
Some(summary::merge_summaries(collected, Some(&root_str)))
|
||||
} else {
|
||||
None
|
||||
|
||||
let gs = all_paths
|
||||
.par_iter()
|
||||
.fold(GlobalSummaries::new, |mut local_gs, path| {
|
||||
if let Ok(bytes) = std::fs::read(path) {
|
||||
match analyse_file_fused(&bytes, path, cfg, None, Some(root)) {
|
||||
Ok(r) => {
|
||||
for s in r.summaries {
|
||||
let key = s.func_key(Some(&root_str));
|
||||
local_gs.insert(key, s);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("pass 1: {}: {e}", path.display());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
tracing::warn!("pass 1: cannot read {}", path.display());
|
||||
}
|
||||
pb.inc(1);
|
||||
local_gs
|
||||
})
|
||||
.reduce(GlobalSummaries::new, |mut a, b| {
|
||||
a.merge(b);
|
||||
a
|
||||
});
|
||||
|
||||
pb.finish_and_clear();
|
||||
tracing::info!("pass 1 complete");
|
||||
gs
|
||||
};
|
||||
|
||||
// ── Pass 2: full analysis with cross‑file context ────────────────────
|
||||
// ── Pass 2: re-run with cross-file global summaries ──────────────────
|
||||
let mut diags: Vec<Diag> = {
|
||||
let _span = tracing::info_span!("pass2_analysis", files = all_paths.len()).entered();
|
||||
let pb = make_progress_bar(
|
||||
all_paths.len() as u64,
|
||||
"Pass 2: Running analysis",
|
||||
show_progress,
|
||||
);
|
||||
|
||||
all_paths
|
||||
let result: Vec<Diag> = all_paths
|
||||
.par_iter()
|
||||
.map(|path| run_rules_on_file(path, cfg, global_summaries.as_ref(), Some(root)))
|
||||
.try_reduce(Vec::new, |mut a, mut b| {
|
||||
a.append(&mut b);
|
||||
Ok(a)
|
||||
})?
|
||||
.flat_map_iter(|path| {
|
||||
let result = match run_rules_on_file(path, cfg, Some(&global_summaries), Some(root))
|
||||
{
|
||||
Ok(d) => d,
|
||||
Err(e) => {
|
||||
tracing::warn!("pass 2: {}: {e}", path.display());
|
||||
vec![]
|
||||
}
|
||||
};
|
||||
pb.inc(1);
|
||||
result
|
||||
})
|
||||
.collect();
|
||||
pb.finish_and_clear();
|
||||
result
|
||||
};
|
||||
tracing::info!(diags = diags.len(), "pass 2 complete");
|
||||
|
||||
|
|
@ -187,6 +315,7 @@ pub fn scan_with_index_parallel(
|
|||
project: &str,
|
||||
pool: Arc<Pool<SqliteConnectionManager>>,
|
||||
cfg: &Config,
|
||||
show_progress: bool,
|
||||
) -> NyxResult<Vec<Diag>> {
|
||||
let files = {
|
||||
let idx = Indexer::from_pool(project, &pool)?;
|
||||
|
|
@ -199,39 +328,37 @@ pub fn scan_with_index_parallel(
|
|||
// ── Pass 1: ensure summaries are up‑to‑date ──────────────────────────
|
||||
if needs_taint {
|
||||
let _span = tracing::info_span!("pass1_indexed", files = files.len()).entered();
|
||||
let pb = make_progress_bar(
|
||||
files.len() as u64,
|
||||
"Pass 1: Extracting summaries",
|
||||
show_progress,
|
||||
);
|
||||
|
||||
files.par_iter().for_each_init(
|
||||
|| Indexer::from_pool(project, &pool).expect("db pool"),
|
||||
|idx, path| {
|
||||
let needs_scan = idx.should_scan(path).unwrap_or(true);
|
||||
if !needs_scan {
|
||||
return; // summaries in DB are still valid
|
||||
}
|
||||
|
||||
// Read once, hash once, extract summaries from bytes.
|
||||
let bytes = match std::fs::read(path) {
|
||||
Ok(b) => b,
|
||||
Err(e) => {
|
||||
tracing::warn!("pass 1: cannot read {}: {e}", path.display());
|
||||
return;
|
||||
}
|
||||
};
|
||||
let hash = {
|
||||
let mut h = blake3::Hasher::new();
|
||||
h.update(&bytes);
|
||||
h.finalize().as_bytes().to_vec()
|
||||
};
|
||||
|
||||
match extract_summaries_from_bytes(&bytes, path, cfg) {
|
||||
Ok(sums) => {
|
||||
idx.replace_summaries_for_file(path, &hash, &sums).ok();
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("pass 1: {}: {e}", path.display());
|
||||
}
|
||||
// Read once, hash once — use the hash for the change check
|
||||
// to avoid a second file read inside should_scan.
|
||||
if let Ok(bytes) = std::fs::read(path) {
|
||||
let hash = Indexer::digest_bytes(&bytes);
|
||||
let needs_scan = idx.should_scan_with_hash(path, &hash).unwrap_or(true);
|
||||
if needs_scan {
|
||||
match extract_summaries_from_bytes(&bytes, path, cfg) {
|
||||
Ok(sums) => {
|
||||
idx.replace_summaries_for_file(path, &hash, &sums).ok();
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("pass 1: {}: {e}", path.display());
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
tracing::warn!("pass 1: cannot read {}", path.display());
|
||||
}
|
||||
pb.inc(1);
|
||||
},
|
||||
);
|
||||
pb.finish_and_clear();
|
||||
}
|
||||
|
||||
// ── Load global summaries ────────────────────────────────────────────
|
||||
|
|
@ -247,26 +374,47 @@ pub fn scan_with_index_parallel(
|
|||
|
||||
// ── Pass 2: full analysis ────────────────────────────────────────────
|
||||
let _span = tracing::info_span!("pass2_indexed").entered();
|
||||
let pb2 = make_progress_bar(
|
||||
files.len() as u64,
|
||||
"Pass 2: Running analysis",
|
||||
show_progress,
|
||||
);
|
||||
let diag_map: DashMap<String, Vec<Diag>> = DashMap::new();
|
||||
|
||||
files.into_par_iter().for_each_init(
|
||||
|| Indexer::from_pool(project, &pool).expect("db pool"),
|
||||
|idx, path| {
|
||||
// Read file once for both change-detection and analysis.
|
||||
let bytes_opt = std::fs::read(&path).ok();
|
||||
let hash = bytes_opt.as_ref().map(|b| Indexer::digest_bytes(b));
|
||||
|
||||
// In pass 2 we always re-analyse when taint is enabled because
|
||||
// global summaries may have changed even if this file didn't.
|
||||
// For AST-only mode, we can still use the cached issues.
|
||||
let needs_scan = if needs_taint {
|
||||
true // conservative: always re-analyse in taint mode
|
||||
} else {
|
||||
idx.should_scan(&path).unwrap_or(true)
|
||||
match (&hash, &bytes_opt) {
|
||||
(Some(h), _) => idx.should_scan_with_hash(&path, h).unwrap_or(true),
|
||||
_ => true,
|
||||
}
|
||||
};
|
||||
|
||||
let mut diags = if needs_scan {
|
||||
let d = run_rules_on_file(&path, cfg, global_summaries.as_ref(), None)
|
||||
.unwrap_or_default();
|
||||
let d = match &bytes_opt {
|
||||
Some(bytes) => {
|
||||
run_rules_on_bytes(bytes, &path, cfg, global_summaries.as_ref(), None)
|
||||
.unwrap_or_default()
|
||||
}
|
||||
None => run_rules_on_file(&path, cfg, global_summaries.as_ref(), None)
|
||||
.unwrap_or_default(),
|
||||
};
|
||||
|
||||
// Persist issues + update file record
|
||||
let file_id = idx.upsert_file(&path).unwrap_or_default();
|
||||
// Persist issues + update file record (use pre-computed hash)
|
||||
let file_id = match &hash {
|
||||
Some(h) => idx.upsert_file_with_hash(&path, h).unwrap_or_default(),
|
||||
None => idx.upsert_file(&path).unwrap_or_default(),
|
||||
};
|
||||
idx.replace_issues(
|
||||
file_id,
|
||||
d.iter().map(|d| IssueRow {
|
||||
|
|
@ -298,8 +446,10 @@ pub fn scan_with_index_parallel(
|
|||
.or_default()
|
||||
.append(&mut diags);
|
||||
}
|
||||
pb2.inc(1);
|
||||
},
|
||||
);
|
||||
pb2.finish_and_clear();
|
||||
|
||||
let mut diags: Vec<Diag> = diag_map.into_iter().flat_map(|(_, v)| v).collect();
|
||||
|
||||
|
|
@ -323,7 +473,8 @@ fn scan_with_index_parallel_uses_existing_index_without_rescanning() {
|
|||
std::fs::write(project_dir.join("foo.txt"), "abc").unwrap();
|
||||
|
||||
let (project_name, db_path) = get_project_info(&project_dir, td.path()).unwrap();
|
||||
crate::commands::index::build_index(&project_name, &project_dir, &db_path, &cfg).unwrap();
|
||||
crate::commands::index::build_index(&project_name, &project_dir, &db_path, &cfg, false)
|
||||
.unwrap();
|
||||
|
||||
let pool = Indexer::init(&db_path).unwrap();
|
||||
|
||||
|
|
@ -336,7 +487,7 @@ fn scan_with_index_parallel_uses_existing_index_without_rescanning() {
|
|||
1
|
||||
);
|
||||
|
||||
let diags = scan_with_index_parallel(&project_name, Arc::clone(&pool), &cfg)
|
||||
let diags = scan_with_index_parallel(&project_name, Arc::clone(&pool), &cfg, false)
|
||||
.expect("scan should succeed");
|
||||
|
||||
assert!(diags.is_empty());
|
||||
|
|
|
|||
107
src/database.rs
107
src/database.rs
|
|
@ -68,9 +68,13 @@ pub mod index {
|
|||
impl Indexer {
|
||||
pub fn init(database_path: &Path) -> NyxResult<Arc<Pool<SqliteConnectionManager>>> {
|
||||
let _span = tracing::info_span!("db_init", path = %database_path.display()).entered();
|
||||
// NO_MUTEX is safe because r2d2 ensures each pooled connection
|
||||
// is only ever used by one thread at a time. Combined with WAL
|
||||
// mode this allows concurrent readers + a single writer without
|
||||
// the global serialization that FULL_MUTEX causes.
|
||||
let flags = OpenFlags::SQLITE_OPEN_READ_WRITE
|
||||
| OpenFlags::SQLITE_OPEN_CREATE
|
||||
| OpenFlags::SQLITE_OPEN_FULL_MUTEX;
|
||||
| OpenFlags::SQLITE_OPEN_NO_MUTEX;
|
||||
let manager = SqliteConnectionManager::file(database_path).with_flags(flags);
|
||||
let pool = Arc::new(Pool::new(manager)?);
|
||||
|
||||
|
|
@ -132,10 +136,13 @@ pub mod index {
|
|||
}
|
||||
|
||||
/// Return true when the file *content* or *mtime* changed since the last scan.
|
||||
///
|
||||
/// Short-circuits on mtime: if the stored mtime matches the
|
||||
/// filesystem mtime, the file is assumed unchanged (skip hash).
|
||||
#[allow(dead_code)] // used in tests and by should_scan_with_hash callers may fall back
|
||||
pub fn should_scan(&self, path: &Path) -> NyxResult<bool> {
|
||||
let meta = fs::metadata(path)?;
|
||||
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
|
||||
let digest = Self::digest_file(path)?;
|
||||
|
||||
let row: Option<(Vec<u8>, i64)> = self
|
||||
.conn
|
||||
|
|
@ -147,18 +154,56 @@ pub mod index {
|
|||
.optional()?;
|
||||
|
||||
Ok(match row {
|
||||
Some((stored_hash, stored_mtime)) => stored_hash != digest || stored_mtime != mtime,
|
||||
Some((stored_hash, stored_mtime)) => {
|
||||
if stored_mtime != mtime {
|
||||
// mtime changed — must re-scan
|
||||
true
|
||||
} else {
|
||||
// mtime matches — compare hash only if cheap
|
||||
// (the caller already read the file and can use
|
||||
// should_scan_with_hash instead for full accuracy)
|
||||
let digest = Self::digest_file(path)?;
|
||||
stored_hash != digest
|
||||
}
|
||||
}
|
||||
None => true,
|
||||
})
|
||||
}
|
||||
|
||||
/// Like [`should_scan`] but accepts a pre-computed hash to avoid
|
||||
/// redundant file reads.
|
||||
pub fn should_scan_with_hash(&self, path: &Path, hash: &[u8]) -> NyxResult<bool> {
|
||||
let row: Option<Vec<u8>> = self
|
||||
.conn
|
||||
.query_row(
|
||||
"SELECT hash FROM files WHERE project = ?1 AND path = ?2",
|
||||
params![self.project, path.to_string_lossy()],
|
||||
|r| r.get(0),
|
||||
)
|
||||
.optional()?;
|
||||
|
||||
Ok(match row {
|
||||
Some(stored_hash) => stored_hash != hash,
|
||||
None => true,
|
||||
})
|
||||
}
|
||||
|
||||
/// Insert or update the `files` row and return its id.
|
||||
pub fn upsert_file(&self, path: &Path) -> NyxResult<i64> {
|
||||
let bytes = fs::read(path)?;
|
||||
let hash = Self::digest_bytes(&bytes);
|
||||
self.upsert_file_with_hash(path, &hash)
|
||||
}
|
||||
|
||||
/// Insert or update the `files` row using a pre-computed hash.
|
||||
/// Avoids redundant file reads when the caller already has the hash.
|
||||
pub fn upsert_file_with_hash(&self, path: &Path, hash: &[u8]) -> NyxResult<i64> {
|
||||
let meta = fs::metadata(path)?;
|
||||
let mtime = meta.modified()?.duration_since(UNIX_EPOCH)?.as_secs() as i64;
|
||||
let scanned_at = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
|
||||
let digest = Self::digest_file(path)?;
|
||||
let path_str = path.to_string_lossy();
|
||||
|
||||
// Use a single statement: upsert then query the id.
|
||||
self.c().execute(
|
||||
"INSERT INTO files (project, path, hash, mtime, scanned_at)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5)
|
||||
|
|
@ -166,18 +211,12 @@ pub mod index {
|
|||
SET hash = excluded.hash,
|
||||
mtime = excluded.mtime,
|
||||
scanned_at = excluded.scanned_at",
|
||||
params![
|
||||
self.project,
|
||||
path.to_string_lossy(),
|
||||
digest,
|
||||
mtime,
|
||||
scanned_at
|
||||
],
|
||||
params![self.project, path_str, hash, mtime, scanned_at],
|
||||
)?;
|
||||
|
||||
let id: i64 = self.c().query_row(
|
||||
"SELECT id FROM files WHERE project = ?1 AND path = ?2",
|
||||
params![self.project, path.to_string_lossy()],
|
||||
params![self.project, path_str],
|
||||
|r| r.get(0),
|
||||
)?;
|
||||
Ok(id)
|
||||
|
|
@ -287,24 +326,38 @@ pub mod index {
|
|||
}
|
||||
|
||||
/// Load every function summary for this project.
|
||||
///
|
||||
/// Reads all JSON strings from SQLite in one pass, then
|
||||
/// deserializes them in parallel with rayon for large result sets.
|
||||
pub fn load_all_summaries(&self) -> NyxResult<Vec<crate::summary::FuncSummary>> {
|
||||
let mut stmt = self
|
||||
.c()
|
||||
.prepare("SELECT summary FROM function_summaries WHERE project = ?1")?;
|
||||
|
||||
let iter = stmt.query_map([&self.project], |row| {
|
||||
let json: String = row.get(0)?;
|
||||
Ok(json)
|
||||
})?;
|
||||
let jsons: Vec<String> = stmt
|
||||
.query_map([&self.project], |row| row.get::<_, String>(0))?
|
||||
.filter_map(Result::ok)
|
||||
.collect();
|
||||
|
||||
let mut out = Vec::new();
|
||||
for row in iter {
|
||||
let json = row?;
|
||||
let s: crate::summary::FuncSummary = serde_json::from_str(&json)
|
||||
.map_err(|e| rusqlite::Error::ToSqlConversionFailure(Box::new(e)))?;
|
||||
out.push(s);
|
||||
// Parallel JSON deserialization for large sets
|
||||
if jsons.len() > 256 {
|
||||
use rayon::prelude::*;
|
||||
let results: Vec<_> = jsons
|
||||
.par_iter()
|
||||
.filter_map(|json| {
|
||||
serde_json::from_str::<crate::summary::FuncSummary>(json).ok()
|
||||
})
|
||||
.collect();
|
||||
Ok(results)
|
||||
} else {
|
||||
let mut out = Vec::with_capacity(jsons.len());
|
||||
for json in &jsons {
|
||||
if let Ok(s) = serde_json::from_str::<crate::summary::FuncSummary>(json) {
|
||||
out.push(s);
|
||||
}
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
/// gets files from the database
|
||||
|
|
@ -351,12 +404,20 @@ pub mod index {
|
|||
// -------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// -------------------------------------------------------------------------
|
||||
#[allow(dead_code)] // used by should_scan() and tests
|
||||
fn digest_file(path: &Path) -> NyxResult<Vec<u8>> {
|
||||
let mut hasher = blake3::Hasher::new();
|
||||
let mut file = fs::File::open(path)?;
|
||||
std::io::copy(&mut file, &mut hasher)?;
|
||||
Ok(hasher.finalize().as_bytes().to_vec())
|
||||
}
|
||||
|
||||
/// Hash already-read bytes without re-reading from disk.
|
||||
pub fn digest_bytes(bytes: &[u8]) -> Vec<u8> {
|
||||
let mut hasher = blake3::Hasher::new();
|
||||
hasher.update(bytes);
|
||||
hasher.finalize().as_bytes().to_vec()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -24,9 +24,13 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["printf", "fprintf", "sprintf", "strcpy", "strcat"],
|
||||
matchers: &["sprintf", "strcpy", "strcat"],
|
||||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["printf", "fprintf"],
|
||||
label: DataLabel::Sink(Cap::FMT_STRING),
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
|
|
|
|||
|
|
@ -22,16 +22,13 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"printf",
|
||||
"fprintf",
|
||||
"sprintf",
|
||||
"strcpy",
|
||||
"strcat",
|
||||
"std::cout",
|
||||
],
|
||||
matchers: &["sprintf", "strcpy", "strcat"],
|
||||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["printf", "fprintf"],
|
||||
label: DataLabel::Sink(Cap::FMT_STRING),
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
|
|
|
|||
|
|
@ -38,6 +38,14 @@ pub static RULES: &[LabelRule] = &[
|
|||
matchers: &["innerHTML"],
|
||||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"location.href",
|
||||
"window.location.href",
|
||||
"document.location.href",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::URL_ENCODE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"child_process.exec",
|
||||
|
|
@ -56,6 +64,7 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
"for_in_statement" => Kind::For,
|
||||
|
||||
"return_statement" => Kind::Return,
|
||||
"throw_statement" => Kind::Return,
|
||||
"break_statement" => Kind::Break,
|
||||
"continue_statement" => Kind::Continue,
|
||||
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ bitflags! {
|
|||
const URL_ENCODE = 0b0000_1000;
|
||||
const JSON_PARSE = 0b0001_0000;
|
||||
const FILE_IO = 0b0010_0000;
|
||||
// todo: add more if needed
|
||||
const FMT_STRING = 0b0100_0000;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -195,6 +195,147 @@ pub fn lookup(lang: &str, raw: &str) -> Kind {
|
|||
.unwrap_or(Kind::Other)
|
||||
}
|
||||
|
||||
/// The kind of taint source, used to refine finding severity.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum SourceKind {
|
||||
/// Direct user input (request params, argv, stdin, form data)
|
||||
UserInput,
|
||||
/// Environment variables and configuration
|
||||
EnvironmentConfig,
|
||||
/// File system reads
|
||||
FileSystem,
|
||||
/// Database query results
|
||||
Database,
|
||||
/// Could not determine — treat conservatively
|
||||
Unknown,
|
||||
}
|
||||
|
||||
/// Infer the source kind from capabilities and callee name.
|
||||
pub fn infer_source_kind(caps: Cap, callee: &str) -> SourceKind {
|
||||
let cl = callee.to_ascii_lowercase();
|
||||
|
||||
// User input patterns
|
||||
if cl.contains("argv")
|
||||
|| cl.contains("stdin")
|
||||
|| cl.contains("request")
|
||||
|| cl.contains("form")
|
||||
|| cl.contains("query")
|
||||
|| cl.contains("params")
|
||||
|| cl.contains("input")
|
||||
|| cl.contains("body")
|
||||
|| cl.contains("header")
|
||||
|| cl.contains("cookie")
|
||||
{
|
||||
return SourceKind::UserInput;
|
||||
}
|
||||
|
||||
// Environment / config patterns
|
||||
if cl.contains("env")
|
||||
|| cl.contains("getenv")
|
||||
|| cl.contains("environ")
|
||||
|| cl.contains("config")
|
||||
{
|
||||
return SourceKind::EnvironmentConfig;
|
||||
}
|
||||
|
||||
// File system patterns
|
||||
if cl.contains("read") || cl.contains("fopen") || cl.contains("open") {
|
||||
// Distinguish from db reads — file reads typically have FILE_IO cap
|
||||
if caps.contains(Cap::FILE_IO) {
|
||||
return SourceKind::FileSystem;
|
||||
}
|
||||
}
|
||||
|
||||
// Database patterns
|
||||
if cl.contains("fetchone")
|
||||
|| cl.contains("fetchall")
|
||||
|| cl.contains("fetch_row")
|
||||
|| cl.contains("query")
|
||||
|| cl.contains("execute")
|
||||
{
|
||||
// Queries that read back from db
|
||||
return SourceKind::Database;
|
||||
}
|
||||
|
||||
SourceKind::Unknown
|
||||
}
|
||||
|
||||
/// Map a source kind to its appropriate severity level.
|
||||
pub fn severity_for_source_kind(kind: SourceKind) -> crate::patterns::Severity {
|
||||
match kind {
|
||||
SourceKind::UserInput => crate::patterns::Severity::High,
|
||||
SourceKind::EnvironmentConfig => crate::patterns::Severity::High,
|
||||
SourceKind::FileSystem => crate::patterns::Severity::Medium,
|
||||
SourceKind::Database => crate::patterns::Severity::Medium,
|
||||
SourceKind::Unknown => crate::patterns::Severity::High,
|
||||
}
|
||||
}
|
||||
|
||||
/// A runtime (config-derived) label rule with owned matchers.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RuntimeLabelRule {
|
||||
pub matchers: Vec<String>,
|
||||
pub label: DataLabel,
|
||||
}
|
||||
|
||||
/// Parse a capability name string into a `Cap` bitflag.
|
||||
pub fn parse_cap(s: &str) -> Option<Cap> {
|
||||
match s.to_ascii_lowercase().as_str() {
|
||||
"env_var" => Some(Cap::ENV_VAR),
|
||||
"html_escape" => Some(Cap::HTML_ESCAPE),
|
||||
"shell_escape" => Some(Cap::SHELL_ESCAPE),
|
||||
"url_encode" => Some(Cap::URL_ENCODE),
|
||||
"json_parse" => Some(Cap::JSON_PARSE),
|
||||
"file_io" => Some(Cap::FILE_IO),
|
||||
"fmt_string" => Some(Cap::FMT_STRING),
|
||||
"all" => Some(Cap::all()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Pre-built analysis rules for a specific language, derived from config.
|
||||
/// Built once per file and threaded through the pipeline.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct LangAnalysisRules {
|
||||
pub extra_labels: Vec<RuntimeLabelRule>,
|
||||
pub terminators: Vec<String>,
|
||||
pub event_handlers: Vec<String>,
|
||||
}
|
||||
|
||||
/// Build `LangAnalysisRules` from a `Config` for a given language slug.
|
||||
pub fn build_lang_rules(
|
||||
config: &crate::utils::config::Config,
|
||||
lang_slug: &str,
|
||||
) -> LangAnalysisRules {
|
||||
let Some(lang_cfg) = config.analysis.languages.get(lang_slug) else {
|
||||
return LangAnalysisRules::default();
|
||||
};
|
||||
|
||||
let extra_labels = lang_cfg
|
||||
.rules
|
||||
.iter()
|
||||
.filter_map(|r| {
|
||||
let cap = parse_cap(&r.cap)?;
|
||||
let label = match r.kind.as_str() {
|
||||
"source" => DataLabel::Source(cap),
|
||||
"sanitizer" => DataLabel::Sanitizer(cap),
|
||||
"sink" => DataLabel::Sink(cap),
|
||||
_ => return None,
|
||||
};
|
||||
Some(RuntimeLabelRule {
|
||||
matchers: r.matchers.clone(),
|
||||
label,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
LangAnalysisRules {
|
||||
extra_labels,
|
||||
terminators: lang_cfg.terminators.clone(),
|
||||
event_handlers: lang_cfg.event_handlers.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Case-insensitive suffix check (ASCII).
|
||||
#[inline]
|
||||
fn ends_with_ignore_case(haystack: &[u8], needle: &[u8]) -> bool {
|
||||
|
|
@ -223,29 +364,58 @@ fn starts_with_ignore_case(haystack: &[u8], needle: &[u8]) -> bool {
|
|||
/// Try to classify a piece of syntax text.
|
||||
/// `lang` is the canonicalised language key ("rust", "javascript", ...).
|
||||
///
|
||||
/// If `extra` runtime rules are provided, they are checked **first** (config
|
||||
/// takes priority over built-in rules).
|
||||
///
|
||||
/// **Two-pass matching** -- exact / suffix matches are checked across *all*
|
||||
/// rules before any prefix (`foo_`) match is attempted. This prevents a
|
||||
/// greedy prefix like `sanitize_` from shadowing a more specific exact
|
||||
/// match like `sanitize_shell`.
|
||||
pub fn classify(lang: &str, text: &str) -> Option<DataLabel> {
|
||||
// Lang slugs are already lowercase; try direct lookup first to avoid
|
||||
// allocating a lowercased copy.
|
||||
pub fn classify(lang: &str, text: &str, extra: Option<&[RuntimeLabelRule]>) -> Option<DataLabel> {
|
||||
let head = text.split(['(', '<']).next().unwrap_or("");
|
||||
let trimmed = head.trim().as_bytes();
|
||||
|
||||
// ── Check runtime (config) rules first — they take priority ──────
|
||||
if let Some(extras) = extra {
|
||||
// Pass 1: exact / suffix
|
||||
for rule in extras {
|
||||
for raw in &rule.matchers {
|
||||
let m = raw.as_bytes();
|
||||
if m.last() == Some(&b'_') {
|
||||
continue;
|
||||
}
|
||||
if ends_with_ignore_case(trimmed, m) {
|
||||
let start = trimmed.len() - m.len();
|
||||
let ok = start == 0 || matches!(trimmed[start - 1], b'.' | b':');
|
||||
if ok {
|
||||
return Some(rule.label);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Pass 2: prefix
|
||||
for rule in extras {
|
||||
for raw in &rule.matchers {
|
||||
let m = raw.as_bytes();
|
||||
if m.last() == Some(&b'_') && starts_with_ignore_case(trimmed, m) {
|
||||
return Some(rule.label);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Built-in static rules ────────────────────────────────────────
|
||||
let rules = REGISTRY.get(lang).or_else(|| {
|
||||
let key = lang.to_ascii_lowercase();
|
||||
REGISTRY.get(key.as_str())
|
||||
})?;
|
||||
|
||||
let head = text.split(['(', '<']).next().unwrap_or("");
|
||||
let trimmed = head.trim().as_bytes();
|
||||
|
||||
// Pass 1: exact / suffix matches (high confidence)
|
||||
// Matchers are already lowercase &'static str, so we compare with
|
||||
// case-insensitive byte helpers — zero heap allocations.
|
||||
for rule in *rules {
|
||||
for raw in rule.matchers {
|
||||
let m = raw.as_bytes();
|
||||
if m.last() == Some(&b'_') {
|
||||
continue; // skip prefix matchers in pass 1
|
||||
continue;
|
||||
}
|
||||
if ends_with_ignore_case(trimmed, m) {
|
||||
let start = trimmed.len() - m.len();
|
||||
|
|
@ -269,3 +439,72 @@ pub fn classify(lang: &str, text: &str) -> Option<DataLabel> {
|
|||
|
||||
None
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn classify_none_extra_unchanged() {
|
||||
// Built-in rule: innerHTML → Sink(HTML_ESCAPE)
|
||||
let result = classify("javascript", "innerHTML", None);
|
||||
assert_eq!(result, Some(DataLabel::Sink(Cap::HTML_ESCAPE)));
|
||||
|
||||
// Non-existent should still be None
|
||||
let result = classify("javascript", "myCustomFunc", None);
|
||||
assert_eq!(result, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_extra_rules_take_priority() {
|
||||
let extras = vec![RuntimeLabelRule {
|
||||
matchers: vec!["escapeHtml".into()],
|
||||
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
|
||||
}];
|
||||
|
||||
let result = classify("javascript", "escapeHtml", Some(&extras));
|
||||
assert_eq!(result, Some(DataLabel::Sanitizer(Cap::HTML_ESCAPE)));
|
||||
|
||||
// Built-in rules still work
|
||||
let result = classify("javascript", "innerHTML", Some(&extras));
|
||||
assert_eq!(result, Some(DataLabel::Sink(Cap::HTML_ESCAPE)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_extra_overrides_builtin() {
|
||||
// Override innerHTML to be a sanitizer (contrived but tests priority)
|
||||
let extras = vec![RuntimeLabelRule {
|
||||
matchers: vec!["innerHTML".into()],
|
||||
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
|
||||
}];
|
||||
|
||||
let result = classify("javascript", "innerHTML", Some(&extras));
|
||||
assert_eq!(result, Some(DataLabel::Sanitizer(Cap::HTML_ESCAPE)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_location_href_is_sink() {
|
||||
let result = classify("javascript", "location.href", None);
|
||||
assert_eq!(result, Some(DataLabel::Sink(Cap::URL_ENCODE)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_bare_href_is_none() {
|
||||
// Bare "href" should NOT be a sink — only "location.href" and variants
|
||||
let result = classify("javascript", "href", None);
|
||||
assert_eq!(result, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_cap_works() {
|
||||
assert_eq!(parse_cap("html_escape"), Some(Cap::HTML_ESCAPE));
|
||||
assert_eq!(parse_cap("shell_escape"), Some(Cap::SHELL_ESCAPE));
|
||||
assert_eq!(parse_cap("url_encode"), Some(Cap::URL_ENCODE));
|
||||
assert_eq!(parse_cap("json_parse"), Some(Cap::JSON_PARSE));
|
||||
assert_eq!(parse_cap("env_var"), Some(Cap::ENV_VAR));
|
||||
assert_eq!(parse_cap("file_io"), Some(Cap::FILE_IO));
|
||||
assert_eq!(parse_cap("all"), Some(Cap::all()));
|
||||
assert_eq!(parse_cap("ALL"), Some(Cap::all()));
|
||||
assert_eq!(parse_cap("invalid"), None);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,6 +22,19 @@ pub static RULES: &[LabelRule] = &[
|
|||
matchers: &["sys.argv"],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["open"],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"argparse.parse_args",
|
||||
"urllib.request.urlopen",
|
||||
"requests.get",
|
||||
"requests.post",
|
||||
],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
},
|
||||
// ───────── Sanitizers ──────────
|
||||
LabelRule {
|
||||
matchers: &["html.escape"],
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ pub mod database;
|
|||
pub mod errors;
|
||||
pub mod interop;
|
||||
pub mod labels;
|
||||
pub mod output;
|
||||
pub mod patterns;
|
||||
pub mod summary;
|
||||
pub mod symbol;
|
||||
|
|
@ -25,5 +26,5 @@ use utils::config::Config;
|
|||
/// Run a two-pass scan without index (filesystem only).
|
||||
/// This is the primary entry point for integration tests.
|
||||
pub fn scan_no_index(root: &Path, cfg: &Config) -> NyxResult<Vec<commands::scan::Diag>> {
|
||||
commands::scan::scan_filesystem(root, cfg)
|
||||
commands::scan::scan_filesystem(root, cfg, false)
|
||||
}
|
||||
|
|
|
|||
24
src/main.rs
24
src/main.rs
|
|
@ -7,6 +7,7 @@ mod database;
|
|||
mod errors;
|
||||
mod interop;
|
||||
mod labels;
|
||||
mod output;
|
||||
mod patterns;
|
||||
mod summary;
|
||||
mod symbol;
|
||||
|
|
@ -65,19 +66,28 @@ fn main() -> NyxResult<()> {
|
|||
let database_dir = proj_dirs.data_local_dir();
|
||||
fs::create_dir_all(database_dir)?;
|
||||
|
||||
let mut config = Config::load(config_dir)?;
|
||||
let (mut config, config_note) = Config::load(config_dir)?;
|
||||
|
||||
rayon::ThreadPoolBuilder::new()
|
||||
.stack_size(config.performance.rayon_thread_stack_size)
|
||||
.build_global()
|
||||
.expect("set rayon stack size");
|
||||
|
||||
commands::handle_command(cli.command, database_dir, &mut config)?;
|
||||
let quiet = config.output.quiet || cli.command.is_structured_output();
|
||||
|
||||
println!(
|
||||
"{} in {:.3}s.",
|
||||
style("Finished").green().bold(),
|
||||
now.elapsed().as_secs_f32()
|
||||
);
|
||||
// Print config note before scanning (human-readable mode only).
|
||||
if let Some(note) = config_note.filter(|_| !quiet) {
|
||||
eprint!("{note}");
|
||||
}
|
||||
|
||||
commands::handle_command(cli.command, database_dir, config_dir, &mut config)?;
|
||||
|
||||
if !quiet {
|
||||
println!(
|
||||
"{} in {:.3}s.",
|
||||
style("Finished").green().bold(),
|
||||
now.elapsed().as_secs_f32()
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
152
src/output.rs
Normal file
152
src/output.rs
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
use crate::commands::scan::Diag;
|
||||
use crate::patterns::{self, Severity};
|
||||
use once_cell::sync::Lazy;
|
||||
use serde_json::{Value, json};
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
/// Lazily-built global map: pattern ID → description from all language registries.
|
||||
static PATTERN_DESCRIPTIONS: Lazy<HashMap<&'static str, &'static str>> = Lazy::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
for lang in &[
|
||||
"rust",
|
||||
"c",
|
||||
"cpp",
|
||||
"java",
|
||||
"go",
|
||||
"php",
|
||||
"python",
|
||||
"ruby",
|
||||
"javascript",
|
||||
"typescript",
|
||||
] {
|
||||
for p in patterns::load(lang) {
|
||||
map.entry(p.id).or_insert(p.description);
|
||||
}
|
||||
}
|
||||
map
|
||||
});
|
||||
|
||||
/// CFG rule descriptions for rules not in the pattern registry.
|
||||
fn cfg_rule_description(id: &str) -> Option<&'static str> {
|
||||
match id {
|
||||
"cfg-unguarded-sink" => Some("Dangerous sink reachable without prior guard or sanitizer"),
|
||||
"cfg-unreachable-sink" => Some("Sink in unreachable code"),
|
||||
"cfg-auth-gap" => Some("Entry-point handler reaches sink without authentication check"),
|
||||
"cfg-error-fallthrough" => {
|
||||
Some("Error check does not terminate; dangerous call follows on error path")
|
||||
}
|
||||
"cfg-resource-leak" => Some("Resource acquired but not released on all exit paths"),
|
||||
"cfg-lock-not-released" => Some("Lock acquired but not released on all exit paths"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Look up a human-readable description for any rule ID.
|
||||
fn rule_description(id: &str) -> &str {
|
||||
// Strip taint-specific suffix for lookup (e.g. "taint-unsanitised-flow:foo.rs:42" → base)
|
||||
let base_id = if id.starts_with("taint-") {
|
||||
"taint-unsanitised-flow"
|
||||
} else {
|
||||
id
|
||||
};
|
||||
|
||||
if let Some(desc) = PATTERN_DESCRIPTIONS.get(base_id) {
|
||||
return desc;
|
||||
}
|
||||
if let Some(desc) = cfg_rule_description(base_id) {
|
||||
return desc;
|
||||
}
|
||||
if base_id == "taint-unsanitised-flow" {
|
||||
return "Unsanitised data flows from source to sink";
|
||||
}
|
||||
id
|
||||
}
|
||||
|
||||
fn severity_to_level(sev: Severity) -> &'static str {
|
||||
match sev {
|
||||
Severity::High => "error",
|
||||
Severity::Medium => "warning",
|
||||
Severity::Low => "note",
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a SARIF 2.1.0 JSON value from a list of diagnostics.
|
||||
pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value {
|
||||
// Deduplicate rule IDs and build rules array.
|
||||
let mut rule_ids: Vec<String> = Vec::new();
|
||||
let mut rule_index_map: HashMap<String, usize> = HashMap::new();
|
||||
|
||||
for d in diags {
|
||||
let base = if d.id.starts_with("taint-") {
|
||||
"taint-unsanitised-flow".to_string()
|
||||
} else {
|
||||
d.id.clone()
|
||||
};
|
||||
if !rule_index_map.contains_key(&base) {
|
||||
let idx = rule_ids.len();
|
||||
rule_index_map.insert(base.clone(), idx);
|
||||
rule_ids.push(base);
|
||||
}
|
||||
}
|
||||
|
||||
let rules: Vec<Value> = rule_ids
|
||||
.iter()
|
||||
.map(|id| {
|
||||
json!({
|
||||
"id": id,
|
||||
"shortDescription": { "text": rule_description(id) },
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
let results: Vec<Value> = diags
|
||||
.iter()
|
||||
.map(|d| {
|
||||
let base = if d.id.starts_with("taint-") {
|
||||
"taint-unsanitised-flow"
|
||||
} else {
|
||||
&d.id
|
||||
};
|
||||
let rule_index = rule_index_map[base];
|
||||
|
||||
// Make path relative to scan root if possible
|
||||
let uri = Path::new(&d.path)
|
||||
.strip_prefix(scan_root)
|
||||
.map(|p| p.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|_| d.path.clone());
|
||||
|
||||
json!({
|
||||
"ruleId": base,
|
||||
"ruleIndex": rule_index,
|
||||
"level": severity_to_level(d.severity),
|
||||
"message": { "text": rule_description(base) },
|
||||
"locations": [{
|
||||
"physicalLocation": {
|
||||
"artifactLocation": { "uri": uri },
|
||||
"region": {
|
||||
"startLine": d.line,
|
||||
"startColumn": d.col
|
||||
}
|
||||
}
|
||||
}]
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
json!({
|
||||
"$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/main/sarif-2.1/schema/sarif-schema-2.1.0.json",
|
||||
"version": "2.1.0",
|
||||
"runs": [{
|
||||
"tool": {
|
||||
"driver": {
|
||||
"name": "nyx",
|
||||
"version": env!("CARGO_PKG_VERSION"),
|
||||
"informationUri": env!("CARGO_PKG_HOMEPAGE"),
|
||||
"rules": rules
|
||||
}
|
||||
},
|
||||
"results": results
|
||||
}]
|
||||
})
|
||||
}
|
||||
|
|
@ -54,9 +54,10 @@ pub const PATTERNS: &[Pattern] = &[
|
|||
description: "Assignment to window.location / location.href",
|
||||
query: "(assignment_expression
|
||||
left: (member_expression
|
||||
object: (identifier)? @obj
|
||||
object: (identifier) @obj
|
||||
(#match? @obj \"^(window|location|document|self|top|parent|frames)$\")
|
||||
property: (property_identifier) @prop
|
||||
(#match? @prop \"location|href\"))) @vuln",
|
||||
(#match? @prop \"^(location|href)$\"))) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
|
|
@ -77,7 +78,7 @@ pub const PATTERNS: &[Pattern] = &[
|
|||
left: (member_expression
|
||||
property: (property_identifier) @prop
|
||||
(#eq? @prop \"__proto__\"))) @vuln",
|
||||
severity: Severity::High,
|
||||
severity: Severity::Low,
|
||||
},
|
||||
Pattern {
|
||||
id: "weak_hash_md5",
|
||||
|
|
|
|||
|
|
@ -23,14 +23,33 @@ pub enum Severity {
|
|||
Low,
|
||||
}
|
||||
|
||||
impl Severity {
|
||||
/// Bracketed, colored, fixed-width tag for aligned console output.
|
||||
///
|
||||
/// Returns e.g. `"[HIGH] "` or `"[MEDIUM]"` — always 8 visible characters
|
||||
/// so the column after the tag lines up regardless of severity.
|
||||
pub fn colored_tag(self) -> String {
|
||||
// Visible widths: "[HIGH]" = 6, "[MEDIUM]" = 8, "[LOW]" = 5.
|
||||
// Pad the *whole* tag to 8 visible chars (the longest, "[MEDIUM]").
|
||||
let (label, styled_fn): (&str, fn(&str) -> String) = match self {
|
||||
Severity::High => ("HIGH", |s| style(s).red().bold().to_string()),
|
||||
Severity::Medium => ("MEDIUM", |s| style(s).yellow().bold().to_string()),
|
||||
Severity::Low => ("LOW", |s| style(s).cyan().bold().to_string()),
|
||||
};
|
||||
let bracket_len = label.len() + 2; // "[" + label + "]"
|
||||
let pad = 8usize.saturating_sub(bracket_len);
|
||||
format!("[{}]{:pad$}", styled_fn(label), "", pad = pad)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Severity {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let s = match *self {
|
||||
let styled = match *self {
|
||||
Severity::High => style("HIGH").red().bold().to_string(),
|
||||
Severity::Medium => style("MEDIUM").yellow().bold().to_string(),
|
||||
Severity::Low => style("LOW").cyan().bold().to_string(),
|
||||
};
|
||||
f.write_str(&s)
|
||||
f.write_str(&styled)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -209,6 +209,13 @@ impl GlobalSummaries {
|
|||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Merge another `GlobalSummaries` into this one (for parallel fold/reduce).
|
||||
pub fn merge(&mut self, other: GlobalSummaries) {
|
||||
for (key, summary) in other.by_key {
|
||||
self.insert(key, summary);
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.by_key.is_empty()
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use crate::cfg::{Cfg, FuncSummaries, NodeInfo, StmtKind};
|
||||
use crate::interop::InteropEdge;
|
||||
use crate::labels::{Cap, DataLabel};
|
||||
use crate::labels::{Cap, DataLabel, SourceKind};
|
||||
use crate::summary::GlobalSummaries;
|
||||
use crate::symbol::Lang;
|
||||
use petgraph::graph::NodeIndex;
|
||||
|
|
@ -18,18 +18,28 @@ pub struct Finding {
|
|||
/// The full path from source to sink through the CFG.
|
||||
#[allow(dead_code)] // used for future detailed diagnostics / path display
|
||||
pub path: Vec<NodeIndex>,
|
||||
/// The kind of source that originated the taint.
|
||||
pub source_kind: SourceKind,
|
||||
}
|
||||
|
||||
/// Order-independent hash of a taint map.
|
||||
///
|
||||
/// Uses XOR of per-entry hashes so the result is the same regardless of
|
||||
/// iteration order — no allocation or sorting required.
|
||||
fn taint_hash(taint: &HashMap<String, Cap>) -> u64 {
|
||||
let mut v: Vec<_> = taint.iter().collect();
|
||||
v.sort_by_key(|(k, _)| k.as_str());
|
||||
let mut hasher = blake3::Hasher::new();
|
||||
for (k, bits) in v {
|
||||
hasher.update(k.as_bytes());
|
||||
hasher.update(&bits.bits().to_le_bytes());
|
||||
let mut h: u64 = 0;
|
||||
for (k, bits) in taint {
|
||||
// Per-entry hash: FNV-1a-style mixing of key bytes + cap bits.
|
||||
let mut entry_h: u64 = 0xcbf2_9ce4_8422_2325; // FNV offset basis
|
||||
for b in k.as_bytes() {
|
||||
entry_h ^= *b as u64;
|
||||
entry_h = entry_h.wrapping_mul(0x0100_0000_01b3); // FNV prime
|
||||
}
|
||||
entry_h ^= bits.bits() as u64;
|
||||
entry_h = entry_h.wrapping_mul(0x0100_0000_01b3);
|
||||
h ^= entry_h;
|
||||
}
|
||||
let digest = hasher.finalize();
|
||||
u64::from_le_bytes(digest.as_bytes()[0..8].try_into().unwrap())
|
||||
h
|
||||
}
|
||||
|
||||
/// Resolved summary for a callee — a uniform view regardless of whether the
|
||||
|
|
@ -140,18 +150,21 @@ fn resolve_callee(
|
|||
None
|
||||
}
|
||||
|
||||
/// Apply taint transfer for a single node, mutating `out` in place.
|
||||
///
|
||||
/// Callers should clone the taint map before calling if they need
|
||||
/// the original state preserved.
|
||||
fn apply_taint(
|
||||
node: &NodeInfo,
|
||||
taint: &HashMap<String, Cap>,
|
||||
out: &mut HashMap<String, Cap>,
|
||||
local_summaries: &FuncSummaries,
|
||||
global_summaries: Option<&GlobalSummaries>,
|
||||
caller_lang: Lang,
|
||||
caller_namespace: &str,
|
||||
interop_edges: &[InteropEdge],
|
||||
) -> HashMap<String, Cap> {
|
||||
) {
|
||||
debug!(target: "taint", "Applying taint to node: {:?}", node);
|
||||
debug!(target: "taint", "Taint: {:?}", taint);
|
||||
let mut out = taint.clone();
|
||||
debug!(target: "taint", "Taint: {:?}", out);
|
||||
|
||||
let caller_func = node.enclosing_func.as_deref().unwrap_or("");
|
||||
|
||||
|
|
@ -236,7 +249,7 @@ fn apply_taint(
|
|||
// ── Sink behaviour: handled in the main analysis loop
|
||||
// (checked via node.label or resolved summary) ──
|
||||
|
||||
return out;
|
||||
return;
|
||||
}
|
||||
|
||||
// Unresolved call — fall through to default gen/kill below
|
||||
|
|
@ -264,8 +277,6 @@ fn apply_taint(
|
|||
out.insert(d.clone(), combined);
|
||||
}
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
/// Run taint analysis on a single file's CFG.
|
||||
|
|
@ -309,9 +320,10 @@ pub fn analyse_file(
|
|||
|
||||
while let Some(Item { node, taint }) = q.pop_front() {
|
||||
let caller_func = cfg[node].enclosing_func.as_deref().unwrap_or("");
|
||||
let out = apply_taint(
|
||||
let mut out = taint.clone();
|
||||
apply_taint(
|
||||
&cfg[node],
|
||||
&taint,
|
||||
&mut out,
|
||||
local_summaries,
|
||||
global_summaries,
|
||||
caller_lang,
|
||||
|
|
@ -398,26 +410,44 @@ pub fn analyse_file(
|
|||
}
|
||||
|
||||
path.reverse();
|
||||
|
||||
// Infer the source kind from the source node's label and callee
|
||||
let source_kind = match cfg[source_node].label {
|
||||
Some(DataLabel::Source(caps)) => {
|
||||
let callee = cfg[source_node].callee.as_deref().unwrap_or("");
|
||||
crate::labels::infer_source_kind(caps, callee)
|
||||
}
|
||||
_ => SourceKind::Unknown,
|
||||
};
|
||||
|
||||
findings.push(Finding {
|
||||
sink: sink_node,
|
||||
source: source_node,
|
||||
path,
|
||||
source_kind,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// enqueue successors
|
||||
for succ in cfg.neighbors(node) {
|
||||
let h = taint_hash(&out);
|
||||
let key = (succ, h);
|
||||
// enqueue successors — cache hashes to avoid recomputation
|
||||
let out_h = taint_hash(&out);
|
||||
let in_h = taint_hash(&taint);
|
||||
let succs: Vec<_> = cfg.neighbors(node).collect();
|
||||
for (i, succ) in succs.iter().enumerate() {
|
||||
let key = (*succ, out_h);
|
||||
if !seen.contains(&key) {
|
||||
seen.insert(key);
|
||||
pred.insert(key, (node, taint_hash(&taint)));
|
||||
let item = Item {
|
||||
node: succ,
|
||||
taint: out.clone(),
|
||||
pred.insert(key, (node, in_h));
|
||||
// Move the map into the last successor to avoid a clone
|
||||
let taint_for_succ = if i + 1 == succs.len() {
|
||||
std::mem::take(&mut out)
|
||||
} else {
|
||||
out.clone()
|
||||
};
|
||||
q.push_back(item);
|
||||
q.push_back(Item {
|
||||
node: *succ,
|
||||
taint: taint_for_succ,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ fn env_to_arg_is_flagged() {
|
|||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
|
||||
assert_eq!(findings.len(), 1); // exactly one unsanitised Source→Sink
|
||||
|
|
@ -49,7 +49,7 @@ fn taint_through_if_else() {
|
|||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
|
||||
// exactly one path (via the True branch) should be flagged
|
||||
|
|
@ -76,7 +76,7 @@ fn taint_through_while_loop() {
|
|||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
assert_eq!(findings.len(), 1);
|
||||
}
|
||||
|
|
@ -102,7 +102,7 @@ fn taint_killed_by_matching_sanitizer() {
|
|||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
assert!(
|
||||
findings.is_empty(),
|
||||
|
|
@ -131,7 +131,7 @@ fn wrong_sanitizer_preserves_taint() {
|
|||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
assert_eq!(
|
||||
findings.len(),
|
||||
|
|
@ -160,7 +160,7 @@ fn taint_breaks_out_of_loop() {
|
|||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
assert_eq!(findings.len(), 1);
|
||||
}
|
||||
|
|
@ -189,7 +189,7 @@ fn test_two_sources_one_sanitised() {
|
|||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
assert_eq!(
|
||||
findings.len(),
|
||||
|
|
@ -222,7 +222,7 @@ fn test_two_sources_wrong_sanitiser_both_flagged() {
|
|||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
assert_eq!(
|
||||
findings.len(),
|
||||
|
|
@ -250,7 +250,7 @@ fn test_should_not_panic_on_empty_function() {
|
|||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
assert!(findings.is_empty());
|
||||
}
|
||||
|
|
@ -374,7 +374,7 @@ fn parse_rust(src: &[u8]) -> (Cfg, NodeIndex, FuncSummaries) {
|
|||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src, None).unwrap();
|
||||
build_cfg(&tree, src, "rust", "test.rs")
|
||||
build_cfg(&tree, src, "rust", "test.rs", None)
|
||||
}
|
||||
|
||||
/// Parse Rust source bytes, build CFG, and export cross-file summaries.
|
||||
|
|
@ -1089,7 +1089,7 @@ fn parse_lang(
|
|||
"ruby" => "test.rb",
|
||||
_ => "test.txt",
|
||||
};
|
||||
build_cfg(&tree, src, slug, ext)
|
||||
build_cfg(&tree, src, slug, ext, None)
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -2206,7 +2206,7 @@ fn return_call_recognized_as_source() {
|
|||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (_, _, summaries) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (_, _, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let exported = export_summaries(&summaries, "test.rs", "rust");
|
||||
|
||||
let foo = exported
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ use crate::errors::NyxResult;
|
|||
use crate::patterns::Severity;
|
||||
use console::style;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use toml;
|
||||
|
|
@ -55,6 +56,11 @@ pub struct ScannerConfig {
|
|||
|
||||
/// Whether to scan hidden files or not.
|
||||
pub scan_hidden_files: bool,
|
||||
|
||||
/// Whether to include findings from non-production paths (tests, vendor,
|
||||
/// benchmarks, etc.) at their original severity. When false (default),
|
||||
/// findings in these paths are downgraded by one severity tier.
|
||||
pub include_nonprod: bool,
|
||||
}
|
||||
impl Default for ScannerConfig {
|
||||
fn default() -> Self {
|
||||
|
|
@ -87,6 +93,7 @@ impl Default for ScannerConfig {
|
|||
one_file_system: false,
|
||||
follow_symlinks: false,
|
||||
scan_hidden_files: false,
|
||||
include_nonprod: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -103,7 +110,7 @@ pub struct DatabaseConfig {
|
|||
/// The maximum size of the database, in megabytes. TODO: IMPLEMENT
|
||||
pub max_db_size_mb: u64,
|
||||
|
||||
/// Whether to run a VACUUM on startup or not. TODO: IMPLEMENT
|
||||
/// Whether to run a VACUUM on startup or not.
|
||||
pub vacuum_on_startup: bool,
|
||||
}
|
||||
impl Default for DatabaseConfig {
|
||||
|
|
@ -120,10 +127,10 @@ impl Default for DatabaseConfig {
|
|||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(default)]
|
||||
pub struct OutputConfig {
|
||||
/// The default output format. TODO: IMPLEMENT others
|
||||
/// The default output format.
|
||||
pub default_format: String,
|
||||
|
||||
/// Whether to print anything to the console or not. TODO: IMPLEMENT
|
||||
/// Whether to print anything to the console or not.
|
||||
pub quiet: bool,
|
||||
|
||||
/// The maximum number of results to show.
|
||||
|
|
@ -147,10 +154,10 @@ pub struct PerformanceConfig {
|
|||
///
|
||||
/// A depth of `1` includes all files under the current directory, a depth of `2` also includes
|
||||
/// all files under subdirectories of the current directory, etc.
|
||||
pub max_depth: Option<usize>, // TODO: IMPLEMENT
|
||||
pub max_depth: Option<usize>,
|
||||
|
||||
/// The minimum depth for reported entries, or `None`.
|
||||
pub min_depth: Option<usize>, // TODO: IMPLEMENT
|
||||
pub min_depth: Option<usize>,
|
||||
|
||||
/// Whether to stop traversing into matching directories.
|
||||
pub prune: bool,
|
||||
|
|
@ -190,6 +197,33 @@ impl Default for PerformanceConfig {
|
|||
}
|
||||
}
|
||||
|
||||
/// A single user-defined label rule from config.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
||||
pub struct ConfigLabelRule {
|
||||
pub matchers: Vec<String>,
|
||||
/// "source", "sanitizer", or "sink"
|
||||
pub kind: String,
|
||||
/// Capability name: "html_escape", "shell_escape", "url_encode", "json_parse",
|
||||
/// "env_var", "file_io", or "all"
|
||||
pub cap: String,
|
||||
}
|
||||
|
||||
/// Per-language analysis configuration from config file.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq, Eq)]
|
||||
#[serde(default)]
|
||||
pub struct LanguageAnalysisConfig {
|
||||
pub rules: Vec<ConfigLabelRule>,
|
||||
pub terminators: Vec<String>,
|
||||
pub event_handlers: Vec<String>,
|
||||
}
|
||||
|
||||
/// Top-level analysis rules config, keyed by language slug.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq, Eq)]
|
||||
#[serde(default)]
|
||||
pub struct AnalysisRulesConfig {
|
||||
pub languages: HashMap<String, LanguageAnalysisConfig>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(default)]
|
||||
#[derive(Default)]
|
||||
|
|
@ -198,10 +232,16 @@ pub struct Config {
|
|||
pub database: DatabaseConfig,
|
||||
pub output: OutputConfig,
|
||||
pub performance: PerformanceConfig,
|
||||
pub analysis: AnalysisRulesConfig,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
pub fn load(config_dir: &Path) -> NyxResult<Self> {
|
||||
/// Load config and return `(config, optional_note)`.
|
||||
///
|
||||
/// The note is a formatted status message about which config file was
|
||||
/// loaded (or that defaults are in use). The caller decides whether to
|
||||
/// print it based on output format / quiet mode.
|
||||
pub fn load(config_dir: &Path) -> NyxResult<(Self, Option<String>)> {
|
||||
let mut config = Config::default();
|
||||
|
||||
let default_config_path = config_dir.join("nyx.conf");
|
||||
|
|
@ -210,33 +250,33 @@ impl Config {
|
|||
}
|
||||
|
||||
let user_config_path = config_dir.join("nyx.local");
|
||||
if user_config_path.exists() {
|
||||
let note = if user_config_path.exists() {
|
||||
let user_config_content = fs::read_to_string(&user_config_path)?;
|
||||
let user_config: Config = toml::from_str(&user_config_content)?;
|
||||
|
||||
config = merge_configs(config, user_config);
|
||||
|
||||
println!(
|
||||
Some(format!(
|
||||
"{}: Loaded user config from: {}\n",
|
||||
style("note").green().bold(),
|
||||
style(user_config_path.display())
|
||||
.underlined()
|
||||
.white()
|
||||
.bold()
|
||||
);
|
||||
))
|
||||
} else {
|
||||
println!(
|
||||
"{}: Using {} configuration.\n Create file in '{}'to customize.\n",
|
||||
Some(format!(
|
||||
"{}: Using {} configuration.\n Create file in '{}' to customize.\n",
|
||||
style("note").green().bold(),
|
||||
style("default").bold(),
|
||||
style(user_config_path.display())
|
||||
.underlined()
|
||||
.white()
|
||||
.bold()
|
||||
);
|
||||
}
|
||||
))
|
||||
};
|
||||
|
||||
Ok(config)
|
||||
Ok((config, note))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -262,6 +302,7 @@ fn merge_configs(mut default: Config, user: Config) -> Config {
|
|||
default.scanner.one_file_system = user.scanner.one_file_system;
|
||||
default.scanner.follow_symlinks = user.scanner.follow_symlinks;
|
||||
default.scanner.scan_hidden_files = user.scanner.scan_hidden_files;
|
||||
default.scanner.include_nonprod = user.scanner.include_nonprod;
|
||||
|
||||
// Merge exclusion lists (default ⊔ user), then sort & dedupe
|
||||
default
|
||||
|
|
@ -299,6 +340,32 @@ fn merge_configs(mut default: Config, user: Config) -> Config {
|
|||
default.performance.scan_timeout_secs = user.performance.scan_timeout_secs;
|
||||
default.performance.memory_limit_mb = user.performance.memory_limit_mb;
|
||||
|
||||
// --- AnalysisRulesConfig ---
|
||||
for (lang, user_lang_cfg) in user.analysis.languages {
|
||||
let entry = default.analysis.languages.entry(lang).or_default();
|
||||
|
||||
// Union-merge rules with dedup
|
||||
for rule in user_lang_cfg.rules {
|
||||
if !entry.rules.contains(&rule) {
|
||||
entry.rules.push(rule);
|
||||
}
|
||||
}
|
||||
|
||||
// Union-merge terminators with dedup
|
||||
for t in user_lang_cfg.terminators {
|
||||
if !entry.terminators.contains(&t) {
|
||||
entry.terminators.push(t);
|
||||
}
|
||||
}
|
||||
|
||||
// Union-merge event_handlers with dedup
|
||||
for eh in user_lang_cfg.event_handlers {
|
||||
if !entry.event_handlers.contains(&eh) {
|
||||
entry.event_handlers.push(eh);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
default
|
||||
}
|
||||
|
||||
|
|
@ -318,6 +385,72 @@ fn merge_configs_dedupes_and_keeps_order() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn merge_analysis_rules_unions_and_dedupes() {
|
||||
let mut default_cfg = Config::default();
|
||||
default_cfg.analysis.languages.insert(
|
||||
"javascript".into(),
|
||||
LanguageAnalysisConfig {
|
||||
rules: vec![ConfigLabelRule {
|
||||
matchers: vec!["escapeHtml".into()],
|
||||
kind: "sanitizer".into(),
|
||||
cap: "html_escape".into(),
|
||||
}],
|
||||
terminators: vec!["process.exit".into()],
|
||||
event_handlers: vec![],
|
||||
},
|
||||
);
|
||||
|
||||
let mut user_cfg = Config::default();
|
||||
user_cfg.analysis.languages.insert(
|
||||
"javascript".into(),
|
||||
LanguageAnalysisConfig {
|
||||
rules: vec![
|
||||
ConfigLabelRule {
|
||||
matchers: vec!["escapeHtml".into()],
|
||||
kind: "sanitizer".into(),
|
||||
cap: "html_escape".into(),
|
||||
},
|
||||
ConfigLabelRule {
|
||||
matchers: vec!["sanitizeUrl".into()],
|
||||
kind: "sanitizer".into(),
|
||||
cap: "url_encode".into(),
|
||||
},
|
||||
],
|
||||
terminators: vec!["process.exit".into(), "abort".into()],
|
||||
event_handlers: vec!["addEventListener".into()],
|
||||
},
|
||||
);
|
||||
|
||||
let merged = merge_configs(default_cfg, user_cfg);
|
||||
let js = merged.analysis.languages.get("javascript").unwrap();
|
||||
assert_eq!(js.rules.len(), 2); // deduped
|
||||
assert_eq!(js.terminators, vec!["process.exit", "abort"]);
|
||||
assert_eq!(js.event_handlers, vec!["addEventListener"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn analysis_config_toml_roundtrip() {
|
||||
let toml_str = r#"
|
||||
[analysis.languages.javascript]
|
||||
terminators = ["process.exit"]
|
||||
event_handlers = ["addEventListener"]
|
||||
|
||||
[[analysis.languages.javascript.rules]]
|
||||
matchers = ["escapeHtml"]
|
||||
kind = "sanitizer"
|
||||
cap = "html_escape"
|
||||
"#;
|
||||
let cfg: Config = toml::from_str(toml_str).unwrap();
|
||||
let js = cfg.analysis.languages.get("javascript").unwrap();
|
||||
assert_eq!(js.rules.len(), 1);
|
||||
assert_eq!(js.rules[0].matchers, vec!["escapeHtml"]);
|
||||
assert_eq!(js.rules[0].kind, "sanitizer");
|
||||
assert_eq!(js.rules[0].cap, "html_escape");
|
||||
assert_eq!(js.terminators, vec!["process.exit"]);
|
||||
assert_eq!(js.event_handlers, vec!["addEventListener"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn load_creates_example_and_reads_user_overrides() {
|
||||
let cfg_dir = tempfile::tempdir().unwrap();
|
||||
|
|
@ -333,7 +466,7 @@ fn load_creates_example_and_reads_user_overrides() {
|
|||
"#;
|
||||
fs::write(cfg_path.join("nyx.local"), user_toml).unwrap();
|
||||
|
||||
let cfg = Config::load(cfg_path).expect("Config::load should succeed");
|
||||
let (cfg, _note) = Config::load(cfg_path).expect("Config::load should succeed");
|
||||
|
||||
assert!(cfg_path.join("nyx.conf").is_file());
|
||||
|
||||
|
|
|
|||
17
src/walk.rs
17
src/walk.rs
|
|
@ -61,6 +61,11 @@ fn build_overrides(root: &Path, cfg: &Config) -> ignore::overrides::Override {
|
|||
tracing::warn!("invalid exclude‐dir pattern ‘{dir}’: {e}");
|
||||
}
|
||||
}
|
||||
for file in &cfg.scanner.excluded_files {
|
||||
if let Err(e) = ob.add(&format!("!{file}")) {
|
||||
tracing::warn!("invalid exclude‐file pattern ‘{file}’: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
ob.build().unwrap_or_else(|e| {
|
||||
tracing::error!("failed to build ignore overrides: {e}");
|
||||
|
|
@ -83,6 +88,9 @@ pub fn spawn_file_walker(root: &Path, cfg: &Config) -> (Receiver<Paths>, JoinHan
|
|||
let follow = cfg.scanner.follow_symlinks;
|
||||
let max_bytes = cfg.scanner.max_file_size_mb.unwrap_or(0) * 1_048_576;
|
||||
let batch_size = cfg.performance.batch_size;
|
||||
let max_depth = cfg.performance.max_depth;
|
||||
let same_file_system = cfg.scanner.one_file_system;
|
||||
let require_git = cfg.scanner.require_git_to_read_vcsignore;
|
||||
|
||||
// ----- 3 the background walker thread ---------------------------------
|
||||
let handle = thread::spawn(move || {
|
||||
|
|
@ -96,11 +104,18 @@ pub fn spawn_file_walker(root: &Path, cfg: &Config) -> (Receiver<Paths>, JoinHan
|
|||
"starting directory walk"
|
||||
);
|
||||
|
||||
WalkBuilder::new(root)
|
||||
let mut builder = WalkBuilder::new(root);
|
||||
builder
|
||||
.hidden(!scan_hidden)
|
||||
.follow_links(follow)
|
||||
.threads(workers)
|
||||
.overrides(overrides)
|
||||
.same_file_system(same_file_system)
|
||||
.require_git(require_git);
|
||||
if let Some(depth) = max_depth {
|
||||
builder.max_depth(Some(depth));
|
||||
}
|
||||
builder
|
||||
.filter_entry(|e| {
|
||||
e.file_type()
|
||||
.map(|ft| ft.is_dir() || ft.is_file())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue