Feat/configurable sanitizers and js precision (#32)

* chore: Exclude CLAUDE.md from Cargo.toml

* feat: Add configurable analysis rules and CLI commands for custom sanitizers and terminators

* feat: Enhance resource management and analysis efficiency

- Implemented parallel summary merging in `scan_filesystem` using rayon for improved performance.
- Introduced `GlobalSummaries::merge()` for efficient merging of summaries.
- Optimized file reading and hashing to eliminate redundant I/O operations.
- Added `should_scan_with_hash()` and `upsert_file_with_hash()` methods to streamline file processing.
- Enhanced taint analysis with in-place mutations to reduce memory allocations.
- Updated resource acquisition patterns to exclude false positives for `freopen` and wrapper functions.

* feat: Implement severity downgrade for findings in non-production paths and add source kind inference

* feat: Update versioning information in SECURITY.md for new stable line

* feat: Update categories in Cargo.toml to include parser-implementations and text-processing

* feat: Update dependencies in Cargo.lock for improved compatibility and performance

* feat: Update dependencies in Cargo.lock and Cargo.toml for improved compatibility
This commit is contained in:
Eli Peter 2026-02-25 04:02:11 -05:00 committed by GitHub
parent f96a89e7c1
commit 19b578c5c4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
37 changed files with 3775 additions and 432 deletions

View file

@ -10,7 +10,7 @@ mod tests;
pub mod unreachable;
use crate::cfg::{FuncSummaries, NodeInfo, StmtKind};
use crate::labels::DataLabel;
use crate::labels::{DataLabel, LangAnalysisRules};
use crate::patterns::Severity;
use crate::summary::GlobalSummaries;
use crate::symbol::Lang;
@ -51,6 +51,11 @@ pub struct AnalysisContext<'a> {
#[allow(dead_code)]
pub global_summaries: Option<&'a GlobalSummaries>,
pub taint_findings: &'a [taint::Finding],
pub analysis_rules: Option<&'a LangAnalysisRules>,
/// Whether full taint analysis was active for this file (global summaries
/// existed and taint engine ran). When false, structural findings without
/// taint confirmation should be treated with lower confidence.
pub taint_active: bool,
}
pub trait CfgAnalysis {
@ -87,6 +92,20 @@ pub fn run_all(ctx: &AnalysisContext) -> Vec<CfgFinding> {
true
});
// ── Dedup: suppress cfg-unguarded-sink when cfg-unreachable-sink covers the span ──
let unreachable_spans: HashSet<(usize, usize)> = findings
.iter()
.filter(|f| f.rule_id == "cfg-unreachable-sink")
.map(|f| f.span)
.collect();
findings.retain(|f| {
if f.rule_id == "cfg-unguarded-sink" && unreachable_spans.contains(&f.span) {
return false;
}
true
});
scoring::score_findings(&mut findings, ctx);
findings.sort_by(|a, b| {
b.score
@ -97,11 +116,36 @@ pub fn run_all(ctx: &AnalysisContext) -> Vec<CfgFinding> {
}
/// Helper: check whether a node is a guard call (validate, sanitize, check, etc.).
pub(crate) fn is_guard_call(info: &NodeInfo, lang: Lang) -> bool {
pub(crate) fn is_guard_call(
info: &NodeInfo,
lang: Lang,
analysis_rules: Option<&LangAnalysisRules>,
) -> bool {
if info.kind != StmtKind::Call {
return false;
}
if let Some(callee) = &info.callee {
// Check config sanitizer rules
if let Some(extras) = analysis_rules {
let callee_lower = callee.to_ascii_lowercase();
for rule in &extras.extra_labels {
if !matches!(rule.label, DataLabel::Sanitizer(_)) {
continue;
}
for m in &rule.matchers {
let ml = m.to_ascii_lowercase();
if ml.ends_with('_') {
if callee_lower.starts_with(&ml) {
return true;
}
} else if callee_lower.ends_with(&ml) {
return true;
}
}
}
}
// Check built-in guard rules
let guard_rules = rules::guard_rules(lang);
let callee_lower = callee.to_ascii_lowercase();
for rule in guard_rules {