mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-21 20:18:06 +02:00
Feat/configurable sanitizers and js precision (#32)
* chore: Exclude CLAUDE.md from Cargo.toml * feat: Add configurable analysis rules and CLI commands for custom sanitizers and terminators * feat: Enhance resource management and analysis efficiency - Implemented parallel summary merging in `scan_filesystem` using rayon for improved performance. - Introduced `GlobalSummaries::merge()` for efficient merging of summaries. - Optimized file reading and hashing to eliminate redundant I/O operations. - Added `should_scan_with_hash()` and `upsert_file_with_hash()` methods to streamline file processing. - Enhanced taint analysis with in-place mutations to reduce memory allocations. - Updated resource acquisition patterns to exclude false positives for `freopen` and wrapper functions. * feat: Implement severity downgrade for findings in non-production paths and add source kind inference * feat: Update versioning information in SECURITY.md for new stable line * feat: Update categories in Cargo.toml to include parser-implementations and text-processing * feat: Update dependencies in Cargo.lock for improved compatibility and performance * feat: Update dependencies in Cargo.lock and Cargo.toml for improved compatibility
This commit is contained in:
parent
f96a89e7c1
commit
19b578c5c4
37 changed files with 3775 additions and 432 deletions
|
|
@ -24,9 +24,13 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["printf", "fprintf", "sprintf", "strcpy", "strcat"],
|
||||
matchers: &["sprintf", "strcpy", "strcat"],
|
||||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["printf", "fprintf"],
|
||||
label: DataLabel::Sink(Cap::FMT_STRING),
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
|
|
|
|||
|
|
@ -22,16 +22,13 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"printf",
|
||||
"fprintf",
|
||||
"sprintf",
|
||||
"strcpy",
|
||||
"strcat",
|
||||
"std::cout",
|
||||
],
|
||||
matchers: &["sprintf", "strcpy", "strcat"],
|
||||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["printf", "fprintf"],
|
||||
label: DataLabel::Sink(Cap::FMT_STRING),
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
|
|
|
|||
|
|
@ -38,6 +38,14 @@ pub static RULES: &[LabelRule] = &[
|
|||
matchers: &["innerHTML"],
|
||||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"location.href",
|
||||
"window.location.href",
|
||||
"document.location.href",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::URL_ENCODE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"child_process.exec",
|
||||
|
|
@ -56,6 +64,7 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
"for_in_statement" => Kind::For,
|
||||
|
||||
"return_statement" => Kind::Return,
|
||||
"throw_statement" => Kind::Return,
|
||||
"break_statement" => Kind::Break,
|
||||
"continue_statement" => Kind::Continue,
|
||||
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ bitflags! {
|
|||
const URL_ENCODE = 0b0000_1000;
|
||||
const JSON_PARSE = 0b0001_0000;
|
||||
const FILE_IO = 0b0010_0000;
|
||||
// todo: add more if needed
|
||||
const FMT_STRING = 0b0100_0000;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -195,6 +195,147 @@ pub fn lookup(lang: &str, raw: &str) -> Kind {
|
|||
.unwrap_or(Kind::Other)
|
||||
}
|
||||
|
||||
/// The kind of taint source, used to refine finding severity.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum SourceKind {
|
||||
/// Direct user input (request params, argv, stdin, form data)
|
||||
UserInput,
|
||||
/// Environment variables and configuration
|
||||
EnvironmentConfig,
|
||||
/// File system reads
|
||||
FileSystem,
|
||||
/// Database query results
|
||||
Database,
|
||||
/// Could not determine — treat conservatively
|
||||
Unknown,
|
||||
}
|
||||
|
||||
/// Infer the source kind from capabilities and callee name.
|
||||
pub fn infer_source_kind(caps: Cap, callee: &str) -> SourceKind {
|
||||
let cl = callee.to_ascii_lowercase();
|
||||
|
||||
// User input patterns
|
||||
if cl.contains("argv")
|
||||
|| cl.contains("stdin")
|
||||
|| cl.contains("request")
|
||||
|| cl.contains("form")
|
||||
|| cl.contains("query")
|
||||
|| cl.contains("params")
|
||||
|| cl.contains("input")
|
||||
|| cl.contains("body")
|
||||
|| cl.contains("header")
|
||||
|| cl.contains("cookie")
|
||||
{
|
||||
return SourceKind::UserInput;
|
||||
}
|
||||
|
||||
// Environment / config patterns
|
||||
if cl.contains("env")
|
||||
|| cl.contains("getenv")
|
||||
|| cl.contains("environ")
|
||||
|| cl.contains("config")
|
||||
{
|
||||
return SourceKind::EnvironmentConfig;
|
||||
}
|
||||
|
||||
// File system patterns
|
||||
if cl.contains("read") || cl.contains("fopen") || cl.contains("open") {
|
||||
// Distinguish from db reads — file reads typically have FILE_IO cap
|
||||
if caps.contains(Cap::FILE_IO) {
|
||||
return SourceKind::FileSystem;
|
||||
}
|
||||
}
|
||||
|
||||
// Database patterns
|
||||
if cl.contains("fetchone")
|
||||
|| cl.contains("fetchall")
|
||||
|| cl.contains("fetch_row")
|
||||
|| cl.contains("query")
|
||||
|| cl.contains("execute")
|
||||
{
|
||||
// Queries that read back from db
|
||||
return SourceKind::Database;
|
||||
}
|
||||
|
||||
SourceKind::Unknown
|
||||
}
|
||||
|
||||
/// Map a source kind to its appropriate severity level.
|
||||
pub fn severity_for_source_kind(kind: SourceKind) -> crate::patterns::Severity {
|
||||
match kind {
|
||||
SourceKind::UserInput => crate::patterns::Severity::High,
|
||||
SourceKind::EnvironmentConfig => crate::patterns::Severity::High,
|
||||
SourceKind::FileSystem => crate::patterns::Severity::Medium,
|
||||
SourceKind::Database => crate::patterns::Severity::Medium,
|
||||
SourceKind::Unknown => crate::patterns::Severity::High,
|
||||
}
|
||||
}
|
||||
|
||||
/// A runtime (config-derived) label rule with owned matchers.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RuntimeLabelRule {
|
||||
pub matchers: Vec<String>,
|
||||
pub label: DataLabel,
|
||||
}
|
||||
|
||||
/// Parse a capability name string into a `Cap` bitflag.
|
||||
pub fn parse_cap(s: &str) -> Option<Cap> {
|
||||
match s.to_ascii_lowercase().as_str() {
|
||||
"env_var" => Some(Cap::ENV_VAR),
|
||||
"html_escape" => Some(Cap::HTML_ESCAPE),
|
||||
"shell_escape" => Some(Cap::SHELL_ESCAPE),
|
||||
"url_encode" => Some(Cap::URL_ENCODE),
|
||||
"json_parse" => Some(Cap::JSON_PARSE),
|
||||
"file_io" => Some(Cap::FILE_IO),
|
||||
"fmt_string" => Some(Cap::FMT_STRING),
|
||||
"all" => Some(Cap::all()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Pre-built analysis rules for a specific language, derived from config.
|
||||
/// Built once per file and threaded through the pipeline.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct LangAnalysisRules {
|
||||
pub extra_labels: Vec<RuntimeLabelRule>,
|
||||
pub terminators: Vec<String>,
|
||||
pub event_handlers: Vec<String>,
|
||||
}
|
||||
|
||||
/// Build `LangAnalysisRules` from a `Config` for a given language slug.
|
||||
pub fn build_lang_rules(
|
||||
config: &crate::utils::config::Config,
|
||||
lang_slug: &str,
|
||||
) -> LangAnalysisRules {
|
||||
let Some(lang_cfg) = config.analysis.languages.get(lang_slug) else {
|
||||
return LangAnalysisRules::default();
|
||||
};
|
||||
|
||||
let extra_labels = lang_cfg
|
||||
.rules
|
||||
.iter()
|
||||
.filter_map(|r| {
|
||||
let cap = parse_cap(&r.cap)?;
|
||||
let label = match r.kind.as_str() {
|
||||
"source" => DataLabel::Source(cap),
|
||||
"sanitizer" => DataLabel::Sanitizer(cap),
|
||||
"sink" => DataLabel::Sink(cap),
|
||||
_ => return None,
|
||||
};
|
||||
Some(RuntimeLabelRule {
|
||||
matchers: r.matchers.clone(),
|
||||
label,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
LangAnalysisRules {
|
||||
extra_labels,
|
||||
terminators: lang_cfg.terminators.clone(),
|
||||
event_handlers: lang_cfg.event_handlers.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Case-insensitive suffix check (ASCII).
|
||||
#[inline]
|
||||
fn ends_with_ignore_case(haystack: &[u8], needle: &[u8]) -> bool {
|
||||
|
|
@ -223,29 +364,58 @@ fn starts_with_ignore_case(haystack: &[u8], needle: &[u8]) -> bool {
|
|||
/// Try to classify a piece of syntax text.
|
||||
/// `lang` is the canonicalised language key ("rust", "javascript", ...).
|
||||
///
|
||||
/// If `extra` runtime rules are provided, they are checked **first** (config
|
||||
/// takes priority over built-in rules).
|
||||
///
|
||||
/// **Two-pass matching** -- exact / suffix matches are checked across *all*
|
||||
/// rules before any prefix (`foo_`) match is attempted. This prevents a
|
||||
/// greedy prefix like `sanitize_` from shadowing a more specific exact
|
||||
/// match like `sanitize_shell`.
|
||||
pub fn classify(lang: &str, text: &str) -> Option<DataLabel> {
|
||||
// Lang slugs are already lowercase; try direct lookup first to avoid
|
||||
// allocating a lowercased copy.
|
||||
pub fn classify(lang: &str, text: &str, extra: Option<&[RuntimeLabelRule]>) -> Option<DataLabel> {
|
||||
let head = text.split(['(', '<']).next().unwrap_or("");
|
||||
let trimmed = head.trim().as_bytes();
|
||||
|
||||
// ── Check runtime (config) rules first — they take priority ──────
|
||||
if let Some(extras) = extra {
|
||||
// Pass 1: exact / suffix
|
||||
for rule in extras {
|
||||
for raw in &rule.matchers {
|
||||
let m = raw.as_bytes();
|
||||
if m.last() == Some(&b'_') {
|
||||
continue;
|
||||
}
|
||||
if ends_with_ignore_case(trimmed, m) {
|
||||
let start = trimmed.len() - m.len();
|
||||
let ok = start == 0 || matches!(trimmed[start - 1], b'.' | b':');
|
||||
if ok {
|
||||
return Some(rule.label);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Pass 2: prefix
|
||||
for rule in extras {
|
||||
for raw in &rule.matchers {
|
||||
let m = raw.as_bytes();
|
||||
if m.last() == Some(&b'_') && starts_with_ignore_case(trimmed, m) {
|
||||
return Some(rule.label);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Built-in static rules ────────────────────────────────────────
|
||||
let rules = REGISTRY.get(lang).or_else(|| {
|
||||
let key = lang.to_ascii_lowercase();
|
||||
REGISTRY.get(key.as_str())
|
||||
})?;
|
||||
|
||||
let head = text.split(['(', '<']).next().unwrap_or("");
|
||||
let trimmed = head.trim().as_bytes();
|
||||
|
||||
// Pass 1: exact / suffix matches (high confidence)
|
||||
// Matchers are already lowercase &'static str, so we compare with
|
||||
// case-insensitive byte helpers — zero heap allocations.
|
||||
for rule in *rules {
|
||||
for raw in rule.matchers {
|
||||
let m = raw.as_bytes();
|
||||
if m.last() == Some(&b'_') {
|
||||
continue; // skip prefix matchers in pass 1
|
||||
continue;
|
||||
}
|
||||
if ends_with_ignore_case(trimmed, m) {
|
||||
let start = trimmed.len() - m.len();
|
||||
|
|
@ -269,3 +439,72 @@ pub fn classify(lang: &str, text: &str) -> Option<DataLabel> {
|
|||
|
||||
None
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn classify_none_extra_unchanged() {
|
||||
// Built-in rule: innerHTML → Sink(HTML_ESCAPE)
|
||||
let result = classify("javascript", "innerHTML", None);
|
||||
assert_eq!(result, Some(DataLabel::Sink(Cap::HTML_ESCAPE)));
|
||||
|
||||
// Non-existent should still be None
|
||||
let result = classify("javascript", "myCustomFunc", None);
|
||||
assert_eq!(result, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_extra_rules_take_priority() {
|
||||
let extras = vec![RuntimeLabelRule {
|
||||
matchers: vec!["escapeHtml".into()],
|
||||
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
|
||||
}];
|
||||
|
||||
let result = classify("javascript", "escapeHtml", Some(&extras));
|
||||
assert_eq!(result, Some(DataLabel::Sanitizer(Cap::HTML_ESCAPE)));
|
||||
|
||||
// Built-in rules still work
|
||||
let result = classify("javascript", "innerHTML", Some(&extras));
|
||||
assert_eq!(result, Some(DataLabel::Sink(Cap::HTML_ESCAPE)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_extra_overrides_builtin() {
|
||||
// Override innerHTML to be a sanitizer (contrived but tests priority)
|
||||
let extras = vec![RuntimeLabelRule {
|
||||
matchers: vec!["innerHTML".into()],
|
||||
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
|
||||
}];
|
||||
|
||||
let result = classify("javascript", "innerHTML", Some(&extras));
|
||||
assert_eq!(result, Some(DataLabel::Sanitizer(Cap::HTML_ESCAPE)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_location_href_is_sink() {
|
||||
let result = classify("javascript", "location.href", None);
|
||||
assert_eq!(result, Some(DataLabel::Sink(Cap::URL_ENCODE)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_bare_href_is_none() {
|
||||
// Bare "href" should NOT be a sink — only "location.href" and variants
|
||||
let result = classify("javascript", "href", None);
|
||||
assert_eq!(result, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_cap_works() {
|
||||
assert_eq!(parse_cap("html_escape"), Some(Cap::HTML_ESCAPE));
|
||||
assert_eq!(parse_cap("shell_escape"), Some(Cap::SHELL_ESCAPE));
|
||||
assert_eq!(parse_cap("url_encode"), Some(Cap::URL_ENCODE));
|
||||
assert_eq!(parse_cap("json_parse"), Some(Cap::JSON_PARSE));
|
||||
assert_eq!(parse_cap("env_var"), Some(Cap::ENV_VAR));
|
||||
assert_eq!(parse_cap("file_io"), Some(Cap::FILE_IO));
|
||||
assert_eq!(parse_cap("all"), Some(Cap::all()));
|
||||
assert_eq!(parse_cap("ALL"), Some(Cap::all()));
|
||||
assert_eq!(parse_cap("invalid"), None);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,6 +22,19 @@ pub static RULES: &[LabelRule] = &[
|
|||
matchers: &["sys.argv"],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["open"],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"argparse.parse_args",
|
||||
"urllib.request.urlopen",
|
||||
"requests.get",
|
||||
"requests.post",
|
||||
],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
},
|
||||
// ───────── Sanitizers ──────────
|
||||
LabelRule {
|
||||
matchers: &["html.escape"],
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue