mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-21 20:18:06 +02:00
Feat/configurable sanitizers and js precision (#32)
* chore: Exclude CLAUDE.md from Cargo.toml * feat: Add configurable analysis rules and CLI commands for custom sanitizers and terminators * feat: Enhance resource management and analysis efficiency - Implemented parallel summary merging in `scan_filesystem` using rayon for improved performance. - Introduced `GlobalSummaries::merge()` for efficient merging of summaries. - Optimized file reading and hashing to eliminate redundant I/O operations. - Added `should_scan_with_hash()` and `upsert_file_with_hash()` methods to streamline file processing. - Enhanced taint analysis with in-place mutations to reduce memory allocations. - Updated resource acquisition patterns to exclude false positives for `freopen` and wrapper functions. * feat: Implement severity downgrade for findings in non-production paths and add source kind inference * feat: Update versioning information in SECURITY.md for new stable line * feat: Update categories in Cargo.toml to include parser-implementations and text-processing * feat: Update dependencies in Cargo.lock for improved compatibility and performance * feat: Update dependencies in Cargo.lock and Cargo.toml for improved compatibility
This commit is contained in:
parent
f96a89e7c1
commit
19b578c5c4
37 changed files with 3775 additions and 432 deletions
|
|
@ -1,6 +1,6 @@
|
|||
use crate::cfg::{Cfg, FuncSummaries, NodeInfo, StmtKind};
|
||||
use crate::interop::InteropEdge;
|
||||
use crate::labels::{Cap, DataLabel};
|
||||
use crate::labels::{Cap, DataLabel, SourceKind};
|
||||
use crate::summary::GlobalSummaries;
|
||||
use crate::symbol::Lang;
|
||||
use petgraph::graph::NodeIndex;
|
||||
|
|
@ -18,18 +18,28 @@ pub struct Finding {
|
|||
/// The full path from source to sink through the CFG.
|
||||
#[allow(dead_code)] // used for future detailed diagnostics / path display
|
||||
pub path: Vec<NodeIndex>,
|
||||
/// The kind of source that originated the taint.
|
||||
pub source_kind: SourceKind,
|
||||
}
|
||||
|
||||
/// Order-independent hash of a taint map.
|
||||
///
|
||||
/// Uses XOR of per-entry hashes so the result is the same regardless of
|
||||
/// iteration order — no allocation or sorting required.
|
||||
fn taint_hash(taint: &HashMap<String, Cap>) -> u64 {
|
||||
let mut v: Vec<_> = taint.iter().collect();
|
||||
v.sort_by_key(|(k, _)| k.as_str());
|
||||
let mut hasher = blake3::Hasher::new();
|
||||
for (k, bits) in v {
|
||||
hasher.update(k.as_bytes());
|
||||
hasher.update(&bits.bits().to_le_bytes());
|
||||
let mut h: u64 = 0;
|
||||
for (k, bits) in taint {
|
||||
// Per-entry hash: FNV-1a-style mixing of key bytes + cap bits.
|
||||
let mut entry_h: u64 = 0xcbf2_9ce4_8422_2325; // FNV offset basis
|
||||
for b in k.as_bytes() {
|
||||
entry_h ^= *b as u64;
|
||||
entry_h = entry_h.wrapping_mul(0x0100_0000_01b3); // FNV prime
|
||||
}
|
||||
entry_h ^= bits.bits() as u64;
|
||||
entry_h = entry_h.wrapping_mul(0x0100_0000_01b3);
|
||||
h ^= entry_h;
|
||||
}
|
||||
let digest = hasher.finalize();
|
||||
u64::from_le_bytes(digest.as_bytes()[0..8].try_into().unwrap())
|
||||
h
|
||||
}
|
||||
|
||||
/// Resolved summary for a callee — a uniform view regardless of whether the
|
||||
|
|
@ -140,18 +150,21 @@ fn resolve_callee(
|
|||
None
|
||||
}
|
||||
|
||||
/// Apply taint transfer for a single node, mutating `out` in place.
|
||||
///
|
||||
/// Callers should clone the taint map before calling if they need
|
||||
/// the original state preserved.
|
||||
fn apply_taint(
|
||||
node: &NodeInfo,
|
||||
taint: &HashMap<String, Cap>,
|
||||
out: &mut HashMap<String, Cap>,
|
||||
local_summaries: &FuncSummaries,
|
||||
global_summaries: Option<&GlobalSummaries>,
|
||||
caller_lang: Lang,
|
||||
caller_namespace: &str,
|
||||
interop_edges: &[InteropEdge],
|
||||
) -> HashMap<String, Cap> {
|
||||
) {
|
||||
debug!(target: "taint", "Applying taint to node: {:?}", node);
|
||||
debug!(target: "taint", "Taint: {:?}", taint);
|
||||
let mut out = taint.clone();
|
||||
debug!(target: "taint", "Taint: {:?}", out);
|
||||
|
||||
let caller_func = node.enclosing_func.as_deref().unwrap_or("");
|
||||
|
||||
|
|
@ -236,7 +249,7 @@ fn apply_taint(
|
|||
// ── Sink behaviour: handled in the main analysis loop
|
||||
// (checked via node.label or resolved summary) ──
|
||||
|
||||
return out;
|
||||
return;
|
||||
}
|
||||
|
||||
// Unresolved call — fall through to default gen/kill below
|
||||
|
|
@ -264,8 +277,6 @@ fn apply_taint(
|
|||
out.insert(d.clone(), combined);
|
||||
}
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
/// Run taint analysis on a single file's CFG.
|
||||
|
|
@ -309,9 +320,10 @@ pub fn analyse_file(
|
|||
|
||||
while let Some(Item { node, taint }) = q.pop_front() {
|
||||
let caller_func = cfg[node].enclosing_func.as_deref().unwrap_or("");
|
||||
let out = apply_taint(
|
||||
let mut out = taint.clone();
|
||||
apply_taint(
|
||||
&cfg[node],
|
||||
&taint,
|
||||
&mut out,
|
||||
local_summaries,
|
||||
global_summaries,
|
||||
caller_lang,
|
||||
|
|
@ -398,26 +410,44 @@ pub fn analyse_file(
|
|||
}
|
||||
|
||||
path.reverse();
|
||||
|
||||
// Infer the source kind from the source node's label and callee
|
||||
let source_kind = match cfg[source_node].label {
|
||||
Some(DataLabel::Source(caps)) => {
|
||||
let callee = cfg[source_node].callee.as_deref().unwrap_or("");
|
||||
crate::labels::infer_source_kind(caps, callee)
|
||||
}
|
||||
_ => SourceKind::Unknown,
|
||||
};
|
||||
|
||||
findings.push(Finding {
|
||||
sink: sink_node,
|
||||
source: source_node,
|
||||
path,
|
||||
source_kind,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// enqueue successors
|
||||
for succ in cfg.neighbors(node) {
|
||||
let h = taint_hash(&out);
|
||||
let key = (succ, h);
|
||||
// enqueue successors — cache hashes to avoid recomputation
|
||||
let out_h = taint_hash(&out);
|
||||
let in_h = taint_hash(&taint);
|
||||
let succs: Vec<_> = cfg.neighbors(node).collect();
|
||||
for (i, succ) in succs.iter().enumerate() {
|
||||
let key = (*succ, out_h);
|
||||
if !seen.contains(&key) {
|
||||
seen.insert(key);
|
||||
pred.insert(key, (node, taint_hash(&taint)));
|
||||
let item = Item {
|
||||
node: succ,
|
||||
taint: out.clone(),
|
||||
pred.insert(key, (node, in_h));
|
||||
// Move the map into the last successor to avoid a clone
|
||||
let taint_for_succ = if i + 1 == succs.len() {
|
||||
std::mem::take(&mut out)
|
||||
} else {
|
||||
out.clone()
|
||||
};
|
||||
q.push_back(item);
|
||||
q.push_back(Item {
|
||||
node: *succ,
|
||||
taint: taint_for_succ,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ fn env_to_arg_is_flagged() {
|
|||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
|
||||
assert_eq!(findings.len(), 1); // exactly one unsanitised Source→Sink
|
||||
|
|
@ -49,7 +49,7 @@ fn taint_through_if_else() {
|
|||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
|
||||
// exactly one path (via the True branch) should be flagged
|
||||
|
|
@ -76,7 +76,7 @@ fn taint_through_while_loop() {
|
|||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
assert_eq!(findings.len(), 1);
|
||||
}
|
||||
|
|
@ -102,7 +102,7 @@ fn taint_killed_by_matching_sanitizer() {
|
|||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
assert!(
|
||||
findings.is_empty(),
|
||||
|
|
@ -131,7 +131,7 @@ fn wrong_sanitizer_preserves_taint() {
|
|||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
assert_eq!(
|
||||
findings.len(),
|
||||
|
|
@ -160,7 +160,7 @@ fn taint_breaks_out_of_loop() {
|
|||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
assert_eq!(findings.len(), 1);
|
||||
}
|
||||
|
|
@ -189,7 +189,7 @@ fn test_two_sources_one_sanitised() {
|
|||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
assert_eq!(
|
||||
findings.len(),
|
||||
|
|
@ -222,7 +222,7 @@ fn test_two_sources_wrong_sanitiser_both_flagged() {
|
|||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
assert_eq!(
|
||||
findings.len(),
|
||||
|
|
@ -250,7 +250,7 @@ fn test_should_not_panic_on_empty_function() {
|
|||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
assert!(findings.is_empty());
|
||||
}
|
||||
|
|
@ -374,7 +374,7 @@ fn parse_rust(src: &[u8]) -> (Cfg, NodeIndex, FuncSummaries) {
|
|||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src, None).unwrap();
|
||||
build_cfg(&tree, src, "rust", "test.rs")
|
||||
build_cfg(&tree, src, "rust", "test.rs", None)
|
||||
}
|
||||
|
||||
/// Parse Rust source bytes, build CFG, and export cross-file summaries.
|
||||
|
|
@ -1089,7 +1089,7 @@ fn parse_lang(
|
|||
"ruby" => "test.rb",
|
||||
_ => "test.txt",
|
||||
};
|
||||
build_cfg(&tree, src, slug, ext)
|
||||
build_cfg(&tree, src, slug, ext, None)
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -2206,7 +2206,7 @@ fn return_call_recognized_as_source() {
|
|||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (_, _, summaries) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (_, _, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let exported = export_summaries(&summaries, "test.rs", "rust");
|
||||
|
||||
let foo = exported
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue