mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
Phase 1 (#33)
* chore: Exclude CLAUDE.md from Cargo.toml * feat: add callgraph module and integrate into main analysis flow * feat: enhance CLI with new severity filtering and analysis modes * feat: update CHANGELOG with recent enhancements and fixes to severity filtering and output handling * feat: implement state-model dataflow analysis for resource lifecycle and auth state * feat: enhance diagnostic output formatting and add evidence structure * feat: implement attack surface ranking for diagnostics with scoring and sorting * feat: add comprehensive documentation for installation, usage, and rules reference * feat: add multiple language support for command execution and evaluation endpoints * feat: implement inline suppression for findings using `nyx:ignore` comments * feat: add confidence levels to AST patterns and update output structure * feat: implement low-noise prioritization system with category filtering, rollup grouping, and configurable budgets * feat: bump version to 0.4.0 and update changelog with new features and improvements * feat: add dead code allowances to various functions in mod.rs and real_world_tests.rs
This commit is contained in:
parent
19b578c5c4
commit
1bbe4b1cfb
456 changed files with 25628 additions and 1228 deletions
432
src/ast.rs
432
src/ast.rs
|
|
@ -2,8 +2,10 @@ use crate::cfg::{build_cfg, export_summaries};
|
|||
use crate::cfg_analysis;
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::errors::{NyxError, NyxResult};
|
||||
use crate::evidence::{Evidence, SpanEvidence, StateEvidence};
|
||||
use crate::labels::{build_lang_rules, severity_for_source_kind};
|
||||
use crate::patterns::Severity;
|
||||
use crate::patterns::{FindingCategory, Severity};
|
||||
use crate::state;
|
||||
use crate::summary::{FuncSummary, GlobalSummaries};
|
||||
use crate::symbol::{Lang, normalize_namespace};
|
||||
use crate::taint::analyse_file;
|
||||
|
|
@ -92,6 +94,23 @@ fn is_nonprod_path(path: &Path) -> bool {
|
|||
false
|
||||
}
|
||||
|
||||
/// Normalize a callee description for display.
|
||||
fn sanitize_desc(s: &str) -> String {
|
||||
crate::fmt::normalize_snippet(s)
|
||||
}
|
||||
|
||||
/// Human-readable label for a `SourceKind`.
|
||||
fn source_kind_label(sk: crate::labels::SourceKind) -> &'static str {
|
||||
use crate::labels::SourceKind;
|
||||
match sk {
|
||||
SourceKind::UserInput => "user input",
|
||||
SourceKind::EnvironmentConfig => "environment config",
|
||||
SourceKind::FileSystem => "file system data",
|
||||
SourceKind::Database => "database result",
|
||||
SourceKind::Unknown => "tainted data",
|
||||
}
|
||||
}
|
||||
|
||||
/// Downgrade severity by one tier: High→Medium, Medium→Low, Low→Low.
|
||||
fn downgrade_severity(s: Severity) -> Severity {
|
||||
match s {
|
||||
|
|
@ -239,8 +258,45 @@ pub fn run_rules_on_bytes(
|
|||
let source_byte = cfg_graph[finding.source].span.0;
|
||||
let source_point = byte_offset_to_point(&_tree, source_byte);
|
||||
|
||||
let source_callee = cfg_graph[finding.source]
|
||||
.callee
|
||||
.as_deref()
|
||||
.map(sanitize_desc)
|
||||
.unwrap_or_else(|| "(unknown)".into());
|
||||
let sink_callee = cfg_graph[finding.sink]
|
||||
.callee
|
||||
.as_deref()
|
||||
.map(sanitize_desc)
|
||||
.unwrap_or_else(|| "(unknown)".into());
|
||||
let kind_label = source_kind_label(finding.source_kind);
|
||||
|
||||
let short_source = crate::fmt::shorten_callee(&source_callee);
|
||||
let short_sink = crate::fmt::shorten_callee(&sink_callee);
|
||||
|
||||
let mut labels = vec![
|
||||
(
|
||||
"Source".into(),
|
||||
format!(
|
||||
"{source_callee} ({}:{})",
|
||||
source_point.row + 1,
|
||||
source_point.column + 1
|
||||
),
|
||||
),
|
||||
("Sink".into(), sink_callee.to_string()),
|
||||
];
|
||||
if let Some(guard) = finding.guard_kind {
|
||||
labels.push(("Path guard".into(), format!("{guard:?}")));
|
||||
}
|
||||
|
||||
let file_path_owned = path.to_string_lossy().into_owned();
|
||||
let mut evidence_notes = Vec::new();
|
||||
if finding.path_validated {
|
||||
evidence_notes.push("path_validated".into());
|
||||
}
|
||||
evidence_notes.push(format!("source_kind:{:?}", finding.source_kind));
|
||||
|
||||
out.push(Diag {
|
||||
path: path.to_string_lossy().into_owned(),
|
||||
path: file_path_owned.clone(),
|
||||
line: sink_point.row + 1,
|
||||
col: sink_point.column + 1,
|
||||
severity: severity_for_source_kind(finding.source_kind),
|
||||
|
|
@ -249,6 +305,50 @@ pub fn run_rules_on_bytes(
|
|||
source_point.row + 1,
|
||||
source_point.column + 1
|
||||
),
|
||||
category: FindingCategory::Security,
|
||||
path_validated: finding.path_validated,
|
||||
guard_kind: finding.guard_kind.map(|k| format!("{k:?}")),
|
||||
message: Some(format!(
|
||||
"unsanitised {kind_label} flows from {short_source} \u{2192} {short_sink}"
|
||||
)),
|
||||
labels,
|
||||
confidence: None,
|
||||
evidence: Some(Evidence {
|
||||
source: Some(SpanEvidence {
|
||||
path: file_path_owned.clone(),
|
||||
line: (source_point.row + 1) as u32,
|
||||
col: (source_point.column + 1) as u32,
|
||||
kind: "source".into(),
|
||||
snippet: Some(short_source.clone()),
|
||||
}),
|
||||
sink: Some(SpanEvidence {
|
||||
path: file_path_owned,
|
||||
line: (sink_point.row + 1) as u32,
|
||||
col: (sink_point.column + 1) as u32,
|
||||
kind: "sink".into(),
|
||||
snippet: Some(short_sink.clone()),
|
||||
}),
|
||||
guards: finding
|
||||
.guard_kind
|
||||
.map(|g| {
|
||||
vec![SpanEvidence {
|
||||
path: path.to_string_lossy().into_owned(),
|
||||
line: (sink_point.row + 1) as u32,
|
||||
col: 0,
|
||||
kind: "guard".into(),
|
||||
snippet: Some(format!("{g:?}")),
|
||||
}]
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
sanitizers: vec![],
|
||||
state: None,
|
||||
notes: evidence_notes,
|
||||
}),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -268,14 +368,111 @@ pub fn run_rules_on_bytes(
|
|||
};
|
||||
for cf in cfg_analysis::run_all(&cfg_ctx) {
|
||||
let point = byte_offset_to_point(&_tree, cf.span.0);
|
||||
let cfg_confidence = Some(match cf.confidence {
|
||||
cfg_analysis::Confidence::High => crate::evidence::Confidence::High,
|
||||
cfg_analysis::Confidence::Medium => crate::evidence::Confidence::Medium,
|
||||
cfg_analysis::Confidence::Low => crate::evidence::Confidence::Low,
|
||||
});
|
||||
out.push(Diag {
|
||||
path: path.to_string_lossy().into_owned(),
|
||||
line: point.row + 1,
|
||||
col: point.column + 1,
|
||||
severity: cf.severity,
|
||||
id: cf.rule_id,
|
||||
category: FindingCategory::Security,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: Some(cf.message),
|
||||
labels: vec![],
|
||||
confidence: cfg_confidence,
|
||||
evidence: Some(Evidence {
|
||||
source: None,
|
||||
sink: Some(SpanEvidence {
|
||||
path: path.to_string_lossy().into_owned(),
|
||||
line: (point.row + 1) as u32,
|
||||
col: (point.column + 1) as u32,
|
||||
kind: "sink".into(),
|
||||
snippet: None,
|
||||
}),
|
||||
guards: vec![],
|
||||
sanitizers: vec![],
|
||||
state: None,
|
||||
notes: vec![],
|
||||
}),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
});
|
||||
}
|
||||
|
||||
// ── State-model dataflow analysis ────────────────────────────────
|
||||
if cfg.scanner.enable_state_analysis {
|
||||
let state_findings = state::run_state_analysis(
|
||||
&cfg_graph,
|
||||
entry,
|
||||
caller_lang,
|
||||
bytes,
|
||||
&summaries,
|
||||
global_summaries,
|
||||
);
|
||||
// Collect state finding lines to dedup overlapping CFG findings.
|
||||
let state_lines: std::collections::HashSet<usize> = state_findings
|
||||
.iter()
|
||||
.map(|sf| byte_offset_to_point(&_tree, sf.span.0).row + 1)
|
||||
.collect();
|
||||
|
||||
for sf in &state_findings {
|
||||
let point = byte_offset_to_point(&_tree, sf.span.0);
|
||||
out.push(Diag {
|
||||
path: path.to_string_lossy().into_owned(),
|
||||
line: point.row + 1,
|
||||
col: point.column + 1,
|
||||
severity: sf.severity,
|
||||
id: sf.rule_id.clone(),
|
||||
category: FindingCategory::Security,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: Some(sf.message.clone()),
|
||||
labels: vec![],
|
||||
confidence: None,
|
||||
evidence: Some(Evidence {
|
||||
source: None,
|
||||
sink: Some(SpanEvidence {
|
||||
path: path.to_string_lossy().into_owned(),
|
||||
line: (point.row + 1) as u32,
|
||||
col: (point.column + 1) as u32,
|
||||
kind: "sink".into(),
|
||||
snippet: None,
|
||||
}),
|
||||
guards: vec![],
|
||||
sanitizers: vec![],
|
||||
state: Some(StateEvidence {
|
||||
machine: sf.machine.into(),
|
||||
subject: sf.subject.clone(),
|
||||
from_state: sf.from_state.into(),
|
||||
to_state: sf.to_state.into(),
|
||||
}),
|
||||
notes: vec![],
|
||||
}),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
});
|
||||
}
|
||||
|
||||
// Suppress cfg-resource-leak / cfg-auth-gap when state analysis
|
||||
// already covers the same line (state analysis is more precise).
|
||||
if !state_findings.is_empty() {
|
||||
out.retain(|d| {
|
||||
!((d.id == "cfg-resource-leak" || d.id == "cfg-auth-gap")
|
||||
&& state_lines.contains(&d.line))
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if cfg.scanner.mode == AnalysisMode::Full || cfg.scanner.mode == AnalysisMode::Ast {
|
||||
|
|
@ -285,7 +482,7 @@ pub fn run_rules_on_bytes(
|
|||
let mut cursor = QueryCursor::new();
|
||||
|
||||
for cq in compiled.iter() {
|
||||
if cfg.scanner.min_severity <= cq.meta.severity {
|
||||
if cq.meta.severity > cfg.scanner.min_severity {
|
||||
continue;
|
||||
}
|
||||
let mut matches = cursor.matches(&cq.query, root, bytes);
|
||||
|
|
@ -298,6 +495,31 @@ pub fn run_rules_on_bytes(
|
|||
col: point.column + 1,
|
||||
severity: cq.meta.severity,
|
||||
id: cq.meta.id.to_owned(),
|
||||
category: cq.meta.category.finding_category(),
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: Some(cq.meta.description.to_owned()),
|
||||
labels: vec![],
|
||||
confidence: Some(cq.meta.confidence),
|
||||
evidence: Some(Evidence {
|
||||
source: None,
|
||||
sink: Some(SpanEvidence {
|
||||
path: path.to_string_lossy().into_owned(),
|
||||
line: (point.row + 1) as u32,
|
||||
col: (point.column + 1) as u32,
|
||||
kind: "sink".into(),
|
||||
snippet: None,
|
||||
}),
|
||||
guards: vec![],
|
||||
sanitizers: vec![],
|
||||
state: None,
|
||||
notes: vec![],
|
||||
}),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -427,8 +649,45 @@ pub fn analyse_file_fused(
|
|||
let source_byte = cfg_graph[finding.source].span.0;
|
||||
let source_point = byte_offset_to_point(&tree, source_byte);
|
||||
|
||||
let source_callee = cfg_graph[finding.source]
|
||||
.callee
|
||||
.as_deref()
|
||||
.map(sanitize_desc)
|
||||
.unwrap_or_else(|| "(unknown)".into());
|
||||
let sink_callee = cfg_graph[finding.sink]
|
||||
.callee
|
||||
.as_deref()
|
||||
.map(sanitize_desc)
|
||||
.unwrap_or_else(|| "(unknown)".into());
|
||||
let kind_label = source_kind_label(finding.source_kind);
|
||||
|
||||
let short_source = crate::fmt::shorten_callee(&source_callee);
|
||||
let short_sink = crate::fmt::shorten_callee(&sink_callee);
|
||||
|
||||
let mut labels = vec![
|
||||
(
|
||||
"Source".into(),
|
||||
format!(
|
||||
"{source_callee} ({}:{})",
|
||||
source_point.row + 1,
|
||||
source_point.column + 1
|
||||
),
|
||||
),
|
||||
("Sink".into(), sink_callee.to_string()),
|
||||
];
|
||||
if let Some(guard) = finding.guard_kind {
|
||||
labels.push(("Path guard".into(), format!("{guard:?}")));
|
||||
}
|
||||
|
||||
let fused_file_path = path.to_string_lossy().into_owned();
|
||||
let mut fused_evidence_notes = Vec::new();
|
||||
if finding.path_validated {
|
||||
fused_evidence_notes.push("path_validated".into());
|
||||
}
|
||||
fused_evidence_notes.push(format!("source_kind:{:?}", finding.source_kind));
|
||||
|
||||
out.push(Diag {
|
||||
path: path.to_string_lossy().into_owned(),
|
||||
path: fused_file_path.clone(),
|
||||
line: sink_point.row + 1,
|
||||
col: sink_point.column + 1,
|
||||
severity: severity_for_source_kind(finding.source_kind),
|
||||
|
|
@ -437,6 +696,50 @@ pub fn analyse_file_fused(
|
|||
source_point.row + 1,
|
||||
source_point.column + 1
|
||||
),
|
||||
category: FindingCategory::Security,
|
||||
path_validated: finding.path_validated,
|
||||
guard_kind: finding.guard_kind.map(|k| format!("{k:?}")),
|
||||
message: Some(format!(
|
||||
"unsanitised {kind_label} flows from {short_source} \u{2192} {short_sink}"
|
||||
)),
|
||||
labels,
|
||||
confidence: None,
|
||||
evidence: Some(Evidence {
|
||||
source: Some(SpanEvidence {
|
||||
path: fused_file_path.clone(),
|
||||
line: (source_point.row + 1) as u32,
|
||||
col: (source_point.column + 1) as u32,
|
||||
kind: "source".into(),
|
||||
snippet: Some(short_source.clone()),
|
||||
}),
|
||||
sink: Some(SpanEvidence {
|
||||
path: fused_file_path.clone(),
|
||||
line: (sink_point.row + 1) as u32,
|
||||
col: (sink_point.column + 1) as u32,
|
||||
kind: "sink".into(),
|
||||
snippet: Some(short_sink.clone()),
|
||||
}),
|
||||
guards: finding
|
||||
.guard_kind
|
||||
.map(|g| {
|
||||
vec![SpanEvidence {
|
||||
path: fused_file_path,
|
||||
line: (sink_point.row + 1) as u32,
|
||||
col: 0,
|
||||
kind: "guard".into(),
|
||||
snippet: Some(format!("{g:?}")),
|
||||
}]
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
sanitizers: vec![],
|
||||
state: None,
|
||||
notes: fused_evidence_notes,
|
||||
}),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -455,14 +758,108 @@ pub fn analyse_file_fused(
|
|||
};
|
||||
for cf in cfg_analysis::run_all(&cfg_ctx) {
|
||||
let point = byte_offset_to_point(&tree, cf.span.0);
|
||||
let fused_cfg_confidence = Some(match cf.confidence {
|
||||
cfg_analysis::Confidence::High => crate::evidence::Confidence::High,
|
||||
cfg_analysis::Confidence::Medium => crate::evidence::Confidence::Medium,
|
||||
cfg_analysis::Confidence::Low => crate::evidence::Confidence::Low,
|
||||
});
|
||||
out.push(Diag {
|
||||
path: path.to_string_lossy().into_owned(),
|
||||
line: point.row + 1,
|
||||
col: point.column + 1,
|
||||
severity: cf.severity,
|
||||
id: cf.rule_id,
|
||||
category: FindingCategory::Security,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: Some(cf.message),
|
||||
labels: vec![],
|
||||
confidence: fused_cfg_confidence,
|
||||
evidence: Some(Evidence {
|
||||
source: None,
|
||||
sink: Some(SpanEvidence {
|
||||
path: path.to_string_lossy().into_owned(),
|
||||
line: (point.row + 1) as u32,
|
||||
col: (point.column + 1) as u32,
|
||||
kind: "sink".into(),
|
||||
snippet: None,
|
||||
}),
|
||||
guards: vec![],
|
||||
sanitizers: vec![],
|
||||
state: None,
|
||||
notes: vec![],
|
||||
}),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
});
|
||||
}
|
||||
|
||||
// ── State-model dataflow analysis ────────────────────────────────
|
||||
if cfg.scanner.enable_state_analysis {
|
||||
let state_findings = state::run_state_analysis(
|
||||
&cfg_graph,
|
||||
entry,
|
||||
caller_lang,
|
||||
bytes,
|
||||
&local_summaries,
|
||||
global_summaries,
|
||||
);
|
||||
let state_lines: std::collections::HashSet<usize> = state_findings
|
||||
.iter()
|
||||
.map(|sf| byte_offset_to_point(&tree, sf.span.0).row + 1)
|
||||
.collect();
|
||||
|
||||
for sf in &state_findings {
|
||||
let point = byte_offset_to_point(&tree, sf.span.0);
|
||||
out.push(Diag {
|
||||
path: path.to_string_lossy().into_owned(),
|
||||
line: point.row + 1,
|
||||
col: point.column + 1,
|
||||
severity: sf.severity,
|
||||
id: sf.rule_id.clone(),
|
||||
category: FindingCategory::Security,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: Some(sf.message.clone()),
|
||||
labels: vec![],
|
||||
confidence: None,
|
||||
evidence: Some(Evidence {
|
||||
source: None,
|
||||
sink: Some(SpanEvidence {
|
||||
path: path.to_string_lossy().into_owned(),
|
||||
line: (point.row + 1) as u32,
|
||||
col: (point.column + 1) as u32,
|
||||
kind: "sink".into(),
|
||||
snippet: None,
|
||||
}),
|
||||
guards: vec![],
|
||||
sanitizers: vec![],
|
||||
state: Some(StateEvidence {
|
||||
machine: sf.machine.into(),
|
||||
subject: sf.subject.clone(),
|
||||
from_state: sf.from_state.into(),
|
||||
to_state: sf.to_state.into(),
|
||||
}),
|
||||
notes: vec![],
|
||||
}),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
});
|
||||
}
|
||||
|
||||
if !state_findings.is_empty() {
|
||||
out.retain(|d| {
|
||||
!((d.id == "cfg-resource-leak" || d.id == "cfg-auth-gap")
|
||||
&& state_lines.contains(&d.line))
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// AST pattern queries
|
||||
|
|
@ -472,7 +869,7 @@ pub fn analyse_file_fused(
|
|||
let mut cursor = QueryCursor::new();
|
||||
|
||||
for cq in compiled.iter() {
|
||||
if cfg.scanner.min_severity <= cq.meta.severity {
|
||||
if cq.meta.severity > cfg.scanner.min_severity {
|
||||
continue;
|
||||
}
|
||||
let mut matches = cursor.matches(&cq.query, root, bytes);
|
||||
|
|
@ -485,6 +882,31 @@ pub fn analyse_file_fused(
|
|||
col: point.column + 1,
|
||||
severity: cq.meta.severity,
|
||||
id: cq.meta.id.to_owned(),
|
||||
category: cq.meta.category.finding_category(),
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: Some(cq.meta.description.to_owned()),
|
||||
labels: vec![],
|
||||
confidence: Some(cq.meta.confidence),
|
||||
evidence: Some(Evidence {
|
||||
source: None,
|
||||
sink: Some(SpanEvidence {
|
||||
path: path.to_string_lossy().into_owned(),
|
||||
line: (point.row + 1) as u32,
|
||||
col: (point.column + 1) as u32,
|
||||
kind: "sink".into(),
|
||||
snippet: None,
|
||||
}),
|
||||
guards: vec![],
|
||||
sanitizers: vec![],
|
||||
state: None,
|
||||
notes: vec![],
|
||||
}),
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
599
src/callgraph.rs
Normal file
599
src/callgraph.rs
Normal file
|
|
@ -0,0 +1,599 @@
|
|||
use crate::interop::InteropEdge;
|
||||
use crate::summary::{CalleeResolution, GlobalSummaries};
|
||||
use crate::symbol::FuncKey;
|
||||
use petgraph::graph::NodeIndex;
|
||||
use petgraph::prelude::*;
|
||||
use std::collections::HashMap;
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Types
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Metadata attached to each call-graph edge.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CallEdge {
|
||||
/// The raw callee string as it appeared in source (e.g. `"env::var"`).
|
||||
/// Preserved for diagnostics — **not** the normalized form used for resolution.
|
||||
#[allow(dead_code)] // used for future diagnostics and path display
|
||||
pub call_site: String,
|
||||
}
|
||||
|
||||
/// A callee that could not be resolved to any known function definition.
|
||||
#[derive(Debug, Clone)]
|
||||
#[allow(dead_code)] // fields used for future diagnostics reporting
|
||||
pub struct UnresolvedCallee {
|
||||
pub caller: FuncKey,
|
||||
pub callee_name: String,
|
||||
}
|
||||
|
||||
/// A callee that matched multiple function definitions — ambiguous.
|
||||
#[derive(Debug, Clone)]
|
||||
#[allow(dead_code)] // fields used for future diagnostics reporting
|
||||
pub struct AmbiguousCallee {
|
||||
pub caller: FuncKey,
|
||||
pub callee_name: String,
|
||||
pub candidates: Vec<FuncKey>,
|
||||
}
|
||||
|
||||
/// The whole-program call graph.
|
||||
///
|
||||
/// Nodes are [`FuncKey`]s (one per function definition across all files).
|
||||
/// Edges represent call-site relationships resolved after pass 1.
|
||||
pub struct CallGraph {
|
||||
pub graph: DiGraph<FuncKey, CallEdge>,
|
||||
/// `FuncKey → NodeIndex` for quick lookup.
|
||||
#[allow(dead_code)] // used for future topo-ordered analysis and call-graph queries
|
||||
pub index: HashMap<FuncKey, NodeIndex>,
|
||||
/// Callee strings that could not be resolved to any [`FuncKey`].
|
||||
pub unresolved_not_found: Vec<UnresolvedCallee>,
|
||||
/// Callee strings that matched multiple candidates.
|
||||
pub unresolved_ambiguous: Vec<AmbiguousCallee>,
|
||||
}
|
||||
|
||||
/// Result of SCC / topological analysis on the call graph.
|
||||
pub struct CallGraphAnalysis {
|
||||
/// Strongly connected components.
|
||||
pub sccs: Vec<Vec<NodeIndex>>,
|
||||
/// Maps each `NodeIndex` to its SCC index in [`sccs`].
|
||||
#[allow(dead_code)] // used for future topo-ordered taint propagation
|
||||
pub node_to_scc: HashMap<NodeIndex, usize>,
|
||||
/// SCC indices in **callee-first** (leaves-first) order.
|
||||
///
|
||||
/// Functions with no callees appear first; callers appear later.
|
||||
/// Suitable for bottom-up taint propagation.
|
||||
#[allow(dead_code)] // used for future topo-ordered taint propagation
|
||||
pub topo_scc_callee_first: Vec<usize>,
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Callee-name normalization
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Extract the last segment of a qualified callee name for resolution.
|
||||
///
|
||||
/// ```text
|
||||
/// "env::var" → "var"
|
||||
/// "std::process::Command" → "Command"
|
||||
/// "obj.method" → "method"
|
||||
/// "pkg.mod.func" → "func"
|
||||
/// "foo" → "foo" (unchanged)
|
||||
/// "" → "" (edge case)
|
||||
/// ```
|
||||
///
|
||||
/// The original raw text is preserved on [`CallEdge::call_site`] for
|
||||
/// diagnostics; this function only produces the lookup key.
|
||||
pub(crate) fn normalize_callee_name(raw: &str) -> &str {
|
||||
// Split on "::" first (Rust-style qualification), take last segment.
|
||||
let after_colons = raw.rsplit("::").next().unwrap_or(raw);
|
||||
// Then split on "." (method calls, Python/JS dotted paths), take last segment.
|
||||
after_colons.rsplit('.').next().unwrap_or(after_colons)
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Call-graph construction
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Build the whole-program call graph from merged summaries.
|
||||
///
|
||||
/// Resolution mirrors `GlobalSummaries::resolve_callee_key`:
|
||||
/// 1. Normalize callee name (last segment after `::` or `.`)
|
||||
/// 2. Same-language, arity-filtered, namespace-disambiguated lookup
|
||||
/// 3. Interop edges (explicit cross-language bridges)
|
||||
///
|
||||
/// Unresolved and ambiguous callees are recorded for diagnostics but
|
||||
/// do **not** create edges.
|
||||
pub fn build_call_graph(summaries: &GlobalSummaries, interop_edges: &[InteropEdge]) -> CallGraph {
|
||||
let mut graph = DiGraph::new();
|
||||
let mut index = HashMap::new();
|
||||
|
||||
// 1. Create one node per FuncKey.
|
||||
for (key, _) in summaries.iter() {
|
||||
let idx = graph.add_node(key.clone());
|
||||
index.insert(key.clone(), idx);
|
||||
}
|
||||
|
||||
let mut unresolved_not_found = Vec::new();
|
||||
let mut unresolved_ambiguous = Vec::new();
|
||||
|
||||
// 2. Resolve callees and add edges.
|
||||
for (caller_key, summary) in summaries.iter() {
|
||||
let caller_node = index[caller_key];
|
||||
|
||||
for raw_callee in &summary.callees {
|
||||
let normalized = normalize_callee_name(raw_callee);
|
||||
|
||||
match summaries.resolve_callee_key(
|
||||
normalized,
|
||||
caller_key.lang,
|
||||
&caller_key.namespace,
|
||||
None,
|
||||
) {
|
||||
CalleeResolution::Resolved(target_key) => {
|
||||
if let Some(&target_node) = index.get(&target_key) {
|
||||
graph.add_edge(
|
||||
caller_node,
|
||||
target_node,
|
||||
CallEdge {
|
||||
call_site: raw_callee.clone(),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
CalleeResolution::NotFound => {
|
||||
// Try interop edges before recording as not-found.
|
||||
if let Some(target_key) =
|
||||
resolve_via_interop(raw_callee, caller_key, interop_edges)
|
||||
&& let Some(&target_node) = index.get(&target_key)
|
||||
{
|
||||
graph.add_edge(
|
||||
caller_node,
|
||||
target_node,
|
||||
CallEdge {
|
||||
call_site: raw_callee.clone(),
|
||||
},
|
||||
);
|
||||
continue;
|
||||
}
|
||||
unresolved_not_found.push(UnresolvedCallee {
|
||||
caller: caller_key.clone(),
|
||||
callee_name: raw_callee.clone(),
|
||||
});
|
||||
}
|
||||
CalleeResolution::Ambiguous(candidates) => {
|
||||
unresolved_ambiguous.push(AmbiguousCallee {
|
||||
caller: caller_key.clone(),
|
||||
callee_name: raw_callee.clone(),
|
||||
candidates,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CallGraph {
|
||||
graph,
|
||||
index,
|
||||
unresolved_not_found,
|
||||
unresolved_ambiguous,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check interop edges for a matching cross-language bridge.
|
||||
fn resolve_via_interop(
|
||||
raw_callee: &str,
|
||||
caller_key: &FuncKey,
|
||||
interop_edges: &[InteropEdge],
|
||||
) -> Option<FuncKey> {
|
||||
for edge in interop_edges {
|
||||
if edge.from.caller_lang == caller_key.lang
|
||||
&& edge.from.caller_namespace == caller_key.namespace
|
||||
&& edge.from.callee_symbol == raw_callee
|
||||
&& (edge.from.caller_func.is_empty() || edge.from.caller_func == caller_key.name)
|
||||
{
|
||||
return Some(edge.to.clone());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// SCC / topological analysis
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Compute SCC decomposition and topological ordering of the call graph.
|
||||
///
|
||||
/// `petgraph::algo::tarjan_scc` returns SCCs in *reverse* topological order
|
||||
/// of the condensation DAG — i.e. leaf SCCs (no outgoing cross-SCC edges)
|
||||
/// come **first**. That is exactly the **callee-first** order suitable for
|
||||
/// bottom-up taint propagation.
|
||||
pub fn analyse(cg: &CallGraph) -> CallGraphAnalysis {
|
||||
let sccs = petgraph::algo::tarjan_scc(&cg.graph);
|
||||
|
||||
let mut node_to_scc = HashMap::with_capacity(cg.graph.node_count());
|
||||
for (scc_idx, scc) in sccs.iter().enumerate() {
|
||||
for &node in scc {
|
||||
node_to_scc.insert(node, scc_idx);
|
||||
}
|
||||
}
|
||||
|
||||
// tarjan_scc already gives callee-first ordering.
|
||||
let topo_scc_callee_first: Vec<usize> = (0..sccs.len()).collect();
|
||||
|
||||
CallGraphAnalysis {
|
||||
sccs,
|
||||
node_to_scc,
|
||||
topo_scc_callee_first,
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Tests
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::interop::CallSiteKey;
|
||||
use crate::summary::{FuncSummary, merge_summaries};
|
||||
use crate::symbol::Lang;
|
||||
|
||||
/// Helper to create a minimal FuncSummary.
|
||||
fn make_summary(
|
||||
name: &str,
|
||||
file_path: &str,
|
||||
lang: &str,
|
||||
param_count: usize,
|
||||
callees: Vec<&str>,
|
||||
) -> FuncSummary {
|
||||
FuncSummary {
|
||||
name: name.into(),
|
||||
file_path: file_path.into(),
|
||||
lang: lang.into(),
|
||||
param_count,
|
||||
param_names: vec![],
|
||||
source_caps: 0,
|
||||
sanitizer_caps: 0,
|
||||
sink_caps: 0,
|
||||
propagates_taint: false,
|
||||
tainted_sink_params: vec![],
|
||||
callees: callees.into_iter().map(String::from).collect(),
|
||||
}
|
||||
}
|
||||
|
||||
// ── normalize_callee_name ────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn normalize_callee_basic() {
|
||||
assert_eq!(normalize_callee_name("env::var"), "var");
|
||||
assert_eq!(normalize_callee_name("std::process::Command"), "Command");
|
||||
assert_eq!(normalize_callee_name("obj.method"), "method");
|
||||
assert_eq!(normalize_callee_name("pkg.mod.func"), "func");
|
||||
assert_eq!(normalize_callee_name("foo"), "foo");
|
||||
assert_eq!(normalize_callee_name(""), "");
|
||||
}
|
||||
|
||||
// ── same name, different Rust modules ────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn same_name_different_rust_modules() {
|
||||
let helper_a = make_summary("helper", "src/a.rs", "rust", 0, vec![]);
|
||||
let helper_b = make_summary("helper", "src/b.rs", "rust", 0, vec![]);
|
||||
let caller = make_summary("caller", "src/a.rs", "rust", 0, vec!["helper"]);
|
||||
|
||||
let gs = merge_summaries(vec![helper_a, helper_b, caller], None);
|
||||
let cg = build_call_graph(&gs, &[]);
|
||||
|
||||
// Two helper nodes + one caller node = 3 nodes
|
||||
assert_eq!(cg.graph.node_count(), 3);
|
||||
|
||||
// Caller is in src/a.rs, so "helper" resolves to src/a.rs::helper
|
||||
let caller_key = FuncKey {
|
||||
lang: Lang::Rust,
|
||||
namespace: "src/a.rs".into(),
|
||||
name: "caller".into(),
|
||||
arity: Some(0),
|
||||
};
|
||||
let helper_a_key = FuncKey {
|
||||
lang: Lang::Rust,
|
||||
namespace: "src/a.rs".into(),
|
||||
name: "helper".into(),
|
||||
arity: Some(0),
|
||||
};
|
||||
|
||||
let caller_node = cg.index[&caller_key];
|
||||
let helper_a_node = cg.index[&helper_a_key];
|
||||
|
||||
// Exactly one edge: caller → helper_a
|
||||
let edges: Vec<_> = cg
|
||||
.graph
|
||||
.edges(caller_node)
|
||||
.filter(|e| e.target() == helper_a_node)
|
||||
.collect();
|
||||
assert_eq!(edges.len(), 1);
|
||||
assert!(cg.unresolved_not_found.is_empty());
|
||||
assert!(cg.unresolved_ambiguous.is_empty());
|
||||
}
|
||||
|
||||
// ── same name, Python vs Rust ────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn same_name_python_and_rust() {
|
||||
let py_foo = make_summary("foo", "handler.py", "python", 0, vec![]);
|
||||
let rs_foo = make_summary("foo", "handler.rs", "rust", 0, vec![]);
|
||||
// Python caller calls "foo" — should only see the Python one
|
||||
let py_caller = make_summary("main", "app.py", "python", 0, vec!["foo"]);
|
||||
|
||||
let gs = merge_summaries(vec![py_foo, rs_foo, py_caller], None);
|
||||
let cg = build_call_graph(&gs, &[]);
|
||||
|
||||
assert_eq!(cg.graph.node_count(), 3);
|
||||
|
||||
let py_foo_key = FuncKey {
|
||||
lang: Lang::Python,
|
||||
namespace: "handler.py".into(),
|
||||
name: "foo".into(),
|
||||
arity: Some(0),
|
||||
};
|
||||
let caller_key = FuncKey {
|
||||
lang: Lang::Python,
|
||||
namespace: "app.py".into(),
|
||||
name: "main".into(),
|
||||
arity: Some(0),
|
||||
};
|
||||
|
||||
let caller_node = cg.index[&caller_key];
|
||||
let py_foo_node = cg.index[&py_foo_key];
|
||||
|
||||
// Edge goes to Python foo, not Rust foo
|
||||
let edges: Vec<_> = cg.graph.edges(caller_node).collect();
|
||||
assert_eq!(edges.len(), 1);
|
||||
assert_eq!(edges[0].target(), py_foo_node);
|
||||
}
|
||||
|
||||
// ── arity differences → separate nodes ───────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn arity_differences_separate_nodes() {
|
||||
let helper1 = make_summary("helper", "lib.rs", "rust", 1, vec![]);
|
||||
let helper2 = make_summary("helper", "lib.rs", "rust", 2, vec![]);
|
||||
|
||||
let gs = merge_summaries(vec![helper1, helper2], None);
|
||||
let cg = build_call_graph(&gs, &[]);
|
||||
|
||||
// Two separate nodes (different arity → different FuncKey)
|
||||
assert_eq!(cg.graph.node_count(), 2);
|
||||
|
||||
let key1 = FuncKey {
|
||||
lang: Lang::Rust,
|
||||
namespace: "lib.rs".into(),
|
||||
name: "helper".into(),
|
||||
arity: Some(1),
|
||||
};
|
||||
let key2 = FuncKey {
|
||||
lang: Lang::Rust,
|
||||
namespace: "lib.rs".into(),
|
||||
name: "helper".into(),
|
||||
arity: Some(2),
|
||||
};
|
||||
assert!(cg.index.contains_key(&key1));
|
||||
assert!(cg.index.contains_key(&key2));
|
||||
}
|
||||
|
||||
// ── recursive SCC detection ──────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn recursive_scc_detection() {
|
||||
let a = make_summary("a", "lib.rs", "rust", 0, vec!["b"]);
|
||||
let b = make_summary("b", "lib.rs", "rust", 0, vec!["a"]);
|
||||
|
||||
let gs = merge_summaries(vec![a, b], None);
|
||||
let cg = build_call_graph(&gs, &[]);
|
||||
|
||||
assert_eq!(cg.graph.edge_count(), 2); // a→b and b→a
|
||||
|
||||
let analysis = analyse(&cg);
|
||||
|
||||
// Both nodes should be in the same SCC
|
||||
let key_a = FuncKey {
|
||||
lang: Lang::Rust,
|
||||
namespace: "lib.rs".into(),
|
||||
name: "a".into(),
|
||||
arity: Some(0),
|
||||
};
|
||||
let key_b = FuncKey {
|
||||
lang: Lang::Rust,
|
||||
namespace: "lib.rs".into(),
|
||||
name: "b".into(),
|
||||
arity: Some(0),
|
||||
};
|
||||
|
||||
let scc_a = analysis.node_to_scc[&cg.index[&key_a]];
|
||||
let scc_b = analysis.node_to_scc[&cg.index[&key_b]];
|
||||
assert_eq!(scc_a, scc_b);
|
||||
assert_eq!(analysis.sccs[scc_a].len(), 2);
|
||||
}
|
||||
|
||||
// ── unresolved callee → recorded as not found ────────────────────────
|
||||
|
||||
#[test]
|
||||
fn unresolved_callee_recorded_as_not_found() {
|
||||
let caller = make_summary("caller", "lib.rs", "rust", 0, vec!["nonexistent"]);
|
||||
|
||||
let gs = merge_summaries(vec![caller], None);
|
||||
let cg = build_call_graph(&gs, &[]);
|
||||
|
||||
assert_eq!(cg.graph.edge_count(), 0);
|
||||
assert_eq!(cg.unresolved_not_found.len(), 1);
|
||||
assert_eq!(cg.unresolved_not_found[0].callee_name, "nonexistent");
|
||||
assert!(cg.unresolved_ambiguous.is_empty());
|
||||
}
|
||||
|
||||
// ── ambiguous callee → recorded as ambiguous ─────────────────────────
|
||||
|
||||
#[test]
|
||||
fn ambiguous_callee_recorded() {
|
||||
// Two "helper" functions in different namespaces.
|
||||
let helper_a = make_summary("helper", "a.rs", "rust", 0, vec![]);
|
||||
let helper_b = make_summary("helper", "b.rs", "rust", 0, vec![]);
|
||||
// Caller is in a THIRD namespace, so neither is preferred.
|
||||
let caller = make_summary("caller", "c.rs", "rust", 0, vec!["helper"]);
|
||||
|
||||
let gs = merge_summaries(vec![helper_a, helper_b, caller], None);
|
||||
let cg = build_call_graph(&gs, &[]);
|
||||
|
||||
assert_eq!(cg.graph.edge_count(), 0); // no edge — ambiguous
|
||||
assert!(cg.unresolved_not_found.is_empty());
|
||||
assert_eq!(cg.unresolved_ambiguous.len(), 1);
|
||||
assert_eq!(cg.unresolved_ambiguous[0].callee_name, "helper");
|
||||
assert_eq!(cg.unresolved_ambiguous[0].candidates.len(), 2);
|
||||
}
|
||||
|
||||
// ── diamond topo order (callee-first) ────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn diamond_topo_callee_first() {
|
||||
// A → B, A → C, B → D, C → D
|
||||
let d = make_summary("d", "lib.rs", "rust", 0, vec![]);
|
||||
let b = make_summary("b", "lib.rs", "rust", 0, vec!["d"]);
|
||||
let c = make_summary("c", "lib.rs", "rust", 0, vec!["d"]);
|
||||
let a = make_summary("a", "lib.rs", "rust", 0, vec!["b", "c"]);
|
||||
|
||||
let gs = merge_summaries(vec![a, b, c, d], None);
|
||||
let cg = build_call_graph(&gs, &[]);
|
||||
|
||||
assert_eq!(cg.graph.node_count(), 4);
|
||||
|
||||
let analysis = analyse(&cg);
|
||||
|
||||
let key = |name: &str| FuncKey {
|
||||
lang: Lang::Rust,
|
||||
namespace: "lib.rs".into(),
|
||||
name: name.into(),
|
||||
arity: Some(0),
|
||||
};
|
||||
|
||||
let scc_of = |name: &str| analysis.node_to_scc[&cg.index[&key(name)]];
|
||||
let topo_pos = |name: &str| {
|
||||
analysis
|
||||
.topo_scc_callee_first
|
||||
.iter()
|
||||
.position(|&s| s == scc_of(name))
|
||||
.unwrap()
|
||||
};
|
||||
|
||||
// D (leaf) must come before B and C, which must come before A (root).
|
||||
assert!(topo_pos("d") < topo_pos("b"));
|
||||
assert!(topo_pos("d") < topo_pos("c"));
|
||||
assert!(topo_pos("b") < topo_pos("a"));
|
||||
assert!(topo_pos("c") < topo_pos("a"));
|
||||
}
|
||||
|
||||
// ── interop edge resolution ──────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn interop_edge_resolution() {
|
||||
let py_caller = make_summary("process", "handler.py", "python", 0, vec!["js_func"]);
|
||||
let js_target = make_summary("js_func", "util.js", "javascript", 1, vec![]);
|
||||
|
||||
let gs = merge_summaries(vec![py_caller, js_target], None);
|
||||
|
||||
let interop = vec![InteropEdge {
|
||||
from: CallSiteKey {
|
||||
caller_lang: Lang::Python,
|
||||
caller_namespace: "handler.py".into(),
|
||||
caller_func: String::new(), // wildcard
|
||||
callee_symbol: "js_func".into(),
|
||||
ordinal: 0,
|
||||
},
|
||||
to: FuncKey {
|
||||
lang: Lang::JavaScript,
|
||||
namespace: "util.js".into(),
|
||||
name: "js_func".into(),
|
||||
arity: Some(1),
|
||||
},
|
||||
arg_map: vec![],
|
||||
ret_taints: false,
|
||||
}];
|
||||
|
||||
let cg = build_call_graph(&gs, &interop);
|
||||
|
||||
let caller_key = FuncKey {
|
||||
lang: Lang::Python,
|
||||
namespace: "handler.py".into(),
|
||||
name: "process".into(),
|
||||
arity: Some(0),
|
||||
};
|
||||
let target_key = FuncKey {
|
||||
lang: Lang::JavaScript,
|
||||
namespace: "util.js".into(),
|
||||
name: "js_func".into(),
|
||||
arity: Some(1),
|
||||
};
|
||||
|
||||
let caller_node = cg.index[&caller_key];
|
||||
let target_node = cg.index[&target_key];
|
||||
|
||||
let edges: Vec<_> = cg
|
||||
.graph
|
||||
.edges(caller_node)
|
||||
.filter(|e| e.target() == target_node)
|
||||
.collect();
|
||||
assert_eq!(edges.len(), 1);
|
||||
assert!(cg.unresolved_not_found.is_empty());
|
||||
}
|
||||
|
||||
// ── namespace normalization consistency ───────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn namespace_normalization_consistency() {
|
||||
// FuncSummary::func_key with a scan root produces the same namespace
|
||||
// string that would be used as caller_namespace in resolution.
|
||||
let summary = FuncSummary {
|
||||
name: "my_func".into(),
|
||||
file_path: "/home/user/proj/src/lib.rs".into(),
|
||||
lang: "rust".into(),
|
||||
param_count: 0,
|
||||
param_names: vec![],
|
||||
source_caps: 0,
|
||||
sanitizer_caps: 0,
|
||||
sink_caps: 0,
|
||||
propagates_taint: false,
|
||||
tainted_sink_params: vec![],
|
||||
callees: vec![],
|
||||
};
|
||||
|
||||
let root = "/home/user/proj";
|
||||
let key = summary.func_key(Some(root));
|
||||
|
||||
// The namespace in the key must be the same as what normalize_namespace produces
|
||||
let expected_ns = crate::symbol::normalize_namespace(&summary.file_path, Some(root));
|
||||
assert_eq!(key.namespace, expected_ns);
|
||||
assert_eq!(key.namespace, "src/lib.rs");
|
||||
}
|
||||
|
||||
// ── raw call_site preserved on edge ──────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn raw_call_site_preserved_on_edge() {
|
||||
// Callee "env::var" normalizes to "var" for resolution, but
|
||||
// the edge should retain the original raw text.
|
||||
let source = make_summary("var", "util.rs", "rust", 0, vec![]);
|
||||
let caller = make_summary("main", "util.rs", "rust", 0, vec!["env::var"]);
|
||||
|
||||
let gs = merge_summaries(vec![source, caller], None);
|
||||
let cg = build_call_graph(&gs, &[]);
|
||||
|
||||
let caller_key = FuncKey {
|
||||
lang: Lang::Rust,
|
||||
namespace: "util.rs".into(),
|
||||
name: "main".into(),
|
||||
arity: Some(0),
|
||||
};
|
||||
let caller_node = cg.index[&caller_key];
|
||||
|
||||
let edges: Vec<_> = cg.graph.edges(caller_node).collect();
|
||||
assert_eq!(edges.len(), 1);
|
||||
// Raw call_site preserved, not the normalized "var"
|
||||
assert_eq!(edges[0].weight().call_site, "env::var");
|
||||
}
|
||||
}
|
||||
417
src/cfg.rs
417
src/cfg.rs
|
|
@ -32,6 +32,9 @@ pub enum EdgeKind {
|
|||
Back, // back‑edge that closes a loop
|
||||
}
|
||||
|
||||
/// Maximum number of identifiers to store from a condition expression.
|
||||
const MAX_COND_VARS: usize = 8;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct NodeInfo {
|
||||
pub kind: StmtKind,
|
||||
|
|
@ -44,6 +47,12 @@ pub struct NodeInfo {
|
|||
pub enclosing_func: Option<String>,
|
||||
/// Per-function call ordinal (0-based, only meaningful for Call nodes).
|
||||
pub call_ordinal: u32,
|
||||
/// For If nodes: raw condition text (truncated to 128 chars). None for non-If nodes.
|
||||
pub condition_text: Option<String>,
|
||||
/// For If nodes: identifiers referenced in the condition (sorted, deduped, max 8).
|
||||
pub condition_vars: Vec<String>,
|
||||
/// For If nodes: whether the condition has a leading negation (`!` / `not`).
|
||||
pub condition_negated: bool,
|
||||
}
|
||||
|
||||
/// Intra‑file function summary with graph‑local node indices.
|
||||
|
|
@ -122,6 +131,7 @@ fn first_call_ident<'a>(n: Node<'a>, lang: &str, code: &'a [u8]) -> Option<Strin
|
|||
.child_by_field_name("function")
|
||||
.or_else(|| c.child_by_field_name("method"))
|
||||
.or_else(|| c.child_by_field_name("name"))
|
||||
.or_else(|| c.child_by_field_name("type"))
|
||||
.and_then(|f| text_of(f, code)),
|
||||
Kind::CallMethod => {
|
||||
let func = c
|
||||
|
|
@ -155,6 +165,65 @@ fn first_call_ident<'a>(n: Node<'a>, lang: &str, code: &'a [u8]) -> Option<Strin
|
|||
None
|
||||
}
|
||||
|
||||
/// Search recursively for any nested call whose identifier classifies as a label.
|
||||
/// Used for cases like `str(eval(expr))` where `str` doesn't match but `eval` does.
|
||||
fn find_classifiable_inner_call<'a>(
|
||||
n: Node<'a>,
|
||||
lang: &str,
|
||||
code: &'a [u8],
|
||||
extra: Option<&[crate::labels::RuntimeLabelRule]>,
|
||||
) -> Option<(String, DataLabel)> {
|
||||
let mut cursor = n.walk();
|
||||
for c in n.children(&mut cursor) {
|
||||
match lookup(lang, c.kind()) {
|
||||
Kind::CallFn | Kind::CallMethod | Kind::CallMacro => {
|
||||
let ident = match lookup(lang, c.kind()) {
|
||||
Kind::CallFn => c
|
||||
.child_by_field_name("function")
|
||||
.or_else(|| c.child_by_field_name("method"))
|
||||
.or_else(|| c.child_by_field_name("name"))
|
||||
.or_else(|| c.child_by_field_name("type"))
|
||||
.and_then(|f| text_of(f, code)),
|
||||
Kind::CallMethod => {
|
||||
let func = c
|
||||
.child_by_field_name("method")
|
||||
.or_else(|| c.child_by_field_name("name"))
|
||||
.and_then(|f| text_of(f, code));
|
||||
let recv = c
|
||||
.child_by_field_name("object")
|
||||
.or_else(|| c.child_by_field_name("receiver"))
|
||||
.and_then(|f| root_receiver_text(f, lang, code));
|
||||
match (recv, func) {
|
||||
(Some(r), Some(f)) => Some(format!("{r}.{f}")),
|
||||
(_, Some(f)) => Some(f),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
Kind::CallMacro => c
|
||||
.child_by_field_name("macro")
|
||||
.and_then(|f| text_of(f, code)),
|
||||
_ => None,
|
||||
};
|
||||
if let Some(ref id) = ident
|
||||
&& let Some(lbl) = classify(lang, id, extra)
|
||||
{
|
||||
return Some((id.clone(), lbl));
|
||||
}
|
||||
// Recurse into arguments of this call
|
||||
if let Some(found) = find_classifiable_inner_call(c, lang, code, extra) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
if let Some(found) = find_classifiable_inner_call(c, lang, code, extra) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Build the dot-joined text of a member_expression / attribute / selector_expression.
|
||||
/// E.g. for `process.env.CMD` this returns `"process.env.CMD"`.
|
||||
fn member_expr_text(n: Node, code: &[u8]) -> Option<String> {
|
||||
|
|
@ -209,6 +278,25 @@ fn first_member_label(
|
|||
}
|
||||
}
|
||||
}
|
||||
// PHP/Python/Ruby subscript access: `$_GET['cmd']`, `os.environ['KEY']`, `params[:cmd]`
|
||||
// Try to classify the object (before the `[`) as a source.
|
||||
"subscript_expression" | "subscript" | "element_reference" => {
|
||||
if let Some(obj) = n
|
||||
.child_by_field_name("object")
|
||||
.or_else(|| n.child_by_field_name("value"))
|
||||
.or_else(|| n.child(0))
|
||||
{
|
||||
if let Some(txt) = text_of(obj, code)
|
||||
&& let Some(lbl) = classify(lang, &txt, extra_labels)
|
||||
{
|
||||
return Some(lbl);
|
||||
}
|
||||
// Recurse into the object for nested member accesses
|
||||
if let Some(lbl) = first_member_label(obj, lang, code, extra_labels) {
|
||||
return Some(lbl);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
let mut cursor = n.walk();
|
||||
|
|
@ -224,6 +312,11 @@ fn first_member_label(
|
|||
fn first_member_text(n: Node, code: &[u8]) -> Option<String> {
|
||||
match n.kind() {
|
||||
"member_expression" | "attribute" | "selector_expression" => member_expr_text(n, code),
|
||||
"subscript_expression" | "subscript" | "element_reference" => n
|
||||
.child_by_field_name("object")
|
||||
.or_else(|| n.child_by_field_name("value"))
|
||||
.or_else(|| n.child(0))
|
||||
.and_then(|obj| text_of(obj, code)),
|
||||
_ => {
|
||||
let mut cursor = n.walk();
|
||||
for child in n.children(&mut cursor) {
|
||||
|
|
@ -237,6 +330,42 @@ fn first_member_text(n: Node, code: &[u8]) -> Option<String> {
|
|||
}
|
||||
|
||||
/// Check whether any descendant of `n` is a call expression.
|
||||
/// Collect function-expression nodes nested inside a call's arguments.
|
||||
///
|
||||
/// This finds anonymous functions / arrow functions / closures that are
|
||||
/// passed as arguments to a call and should be analysed as separate
|
||||
/// function scopes. Only direct function-argument children are collected
|
||||
/// (not functions nested inside other functions — those get handled when
|
||||
/// the outer function is recursed into).
|
||||
fn collect_nested_function_nodes<'a>(n: Node<'a>, lang: &str) -> Vec<Node<'a>> {
|
||||
let mut funcs = Vec::new();
|
||||
collect_nested_functions_rec(n, lang, &mut funcs, false);
|
||||
funcs
|
||||
}
|
||||
|
||||
fn collect_nested_functions_rec<'a>(
|
||||
n: Node<'a>,
|
||||
lang: &str,
|
||||
out: &mut Vec<Node<'a>>,
|
||||
inside_function: bool,
|
||||
) {
|
||||
let kind = lookup(lang, n.kind());
|
||||
// Only treat as a function if it's a real function node (has children),
|
||||
// not a keyword token like `function` in JS which shares the same kind name.
|
||||
if kind == Kind::Function && n.child_count() > 0 {
|
||||
if inside_function {
|
||||
// Don't recurse into nested functions of nested functions
|
||||
return;
|
||||
}
|
||||
out.push(n);
|
||||
return;
|
||||
}
|
||||
let mut cursor = n.walk();
|
||||
for c in n.children(&mut cursor) {
|
||||
collect_nested_functions_rec(c, lang, out, inside_function);
|
||||
}
|
||||
}
|
||||
|
||||
fn has_call_descendant(n: Node, lang: &str) -> bool {
|
||||
let mut cursor = n.walk();
|
||||
for c in n.children(&mut cursor) {
|
||||
|
|
@ -361,6 +490,36 @@ fn def_use(ast: Node, lang: &str, code: &[u8]) -> (Option<String>, Vec<String>)
|
|||
(defs, uses)
|
||||
}
|
||||
|
||||
// if‑let / while‑let — the `let_condition` binds a variable from
|
||||
// the value expression. E.g. `if let Ok(cmd) = env::var("CMD")`
|
||||
// defines `cmd` and uses `env`, `var`, `CMD`.
|
||||
Kind::If | Kind::While => {
|
||||
let cond = ast.child_by_field_name("condition");
|
||||
if let Some(c) = cond
|
||||
&& c.kind() == "let_condition"
|
||||
{
|
||||
let mut defs = None;
|
||||
let mut uses = Vec::new();
|
||||
|
||||
if let Some(pat) = c.child_by_field_name("pattern") {
|
||||
let mut tmp = Vec::<String>::new();
|
||||
collect_idents(pat, code, &mut tmp);
|
||||
// The first plain identifier in the pattern is the binding.
|
||||
// Skip type identifiers (e.g. "Ok" in Ok(cmd)) — take the
|
||||
// last ident which is the inner binding name.
|
||||
defs = tmp.into_iter().last();
|
||||
}
|
||||
if let Some(val) = c.child_by_field_name("value") {
|
||||
collect_idents(val, code, &mut uses);
|
||||
}
|
||||
return (defs, uses);
|
||||
}
|
||||
|
||||
let mut uses = Vec::new();
|
||||
collect_idents(ast, code, &mut uses);
|
||||
(None, uses)
|
||||
}
|
||||
|
||||
// everything else – no definition, but may read vars
|
||||
_ => {
|
||||
let mut uses = Vec::new();
|
||||
|
|
@ -370,6 +529,109 @@ fn def_use(ast: Node, lang: &str, code: &[u8]) -> (Option<String>, Vec<String>)
|
|||
}
|
||||
}
|
||||
|
||||
/// Extract raw condition metadata from an If AST node.
|
||||
///
|
||||
/// Returns `(condition_text, condition_vars, condition_negated)`.
|
||||
/// The condition subtree is located via `child_by_field_name("condition")`
|
||||
/// for most languages, with a positional fallback for Rust `if_expression`.
|
||||
///
|
||||
/// Negation is detected by checking for a leading unary `!` operator or
|
||||
/// `not` keyword. Variables are sorted, deduped, and capped at
|
||||
/// [`MAX_COND_VARS`].
|
||||
fn extract_condition_raw<'a>(
|
||||
ast: Node<'a>,
|
||||
lang: &str,
|
||||
code: &'a [u8],
|
||||
) -> (Option<String>, Vec<String>, bool) {
|
||||
// 1. Find the condition subtree.
|
||||
let cond_node = ast.child_by_field_name("condition").or_else(|| {
|
||||
// Rust `if_expression` uses positional children: the condition is
|
||||
// the first child that is not a keyword, block, or `let` pattern.
|
||||
let mut cursor = ast.walk();
|
||||
ast.children(&mut cursor).find(|c| {
|
||||
let k = c.kind();
|
||||
!matches!(lookup(lang, k), Kind::Block | Kind::Trivia)
|
||||
&& k != "if"
|
||||
&& k != "else"
|
||||
&& k != "let"
|
||||
&& k != "{"
|
||||
&& k != "}"
|
||||
&& k != "("
|
||||
&& k != ")"
|
||||
})
|
||||
});
|
||||
|
||||
let Some(cond) = cond_node else {
|
||||
return (None, Vec::new(), false);
|
||||
};
|
||||
|
||||
// 2. Detect leading negation (`!expr`, `not expr`, Ruby `unless`).
|
||||
let (inner, negated) = detect_negation(cond, ast, lang);
|
||||
|
||||
// 3. Collect identifiers from the (inner) condition subtree.
|
||||
let mut vars = Vec::new();
|
||||
collect_idents(inner, code, &mut vars);
|
||||
vars.sort();
|
||||
vars.dedup();
|
||||
vars.truncate(MAX_COND_VARS);
|
||||
|
||||
// 4. Extract text, truncated.
|
||||
let text = text_of(cond, code).map(|t| {
|
||||
if t.len() > 128 {
|
||||
t[..128].to_string()
|
||||
} else {
|
||||
t
|
||||
}
|
||||
});
|
||||
|
||||
(text, vars, negated)
|
||||
}
|
||||
|
||||
/// Detect leading negation and return the inner expression.
|
||||
///
|
||||
/// Handles:
|
||||
/// - `!expr` (unary_expression / prefix_unary_expression with `!` operator)
|
||||
/// - `not expr` (Python `not_operator`, Ruby)
|
||||
/// - Ruby `unless` (the whole If node kind is `unless`)
|
||||
fn detect_negation<'a>(cond: Node<'a>, if_ast: Node<'a>, _lang: &str) -> (Node<'a>, bool) {
|
||||
// Ruby `unless` is mapped to Kind::If but is semantically negated.
|
||||
if if_ast.kind() == "unless" {
|
||||
return (cond, true);
|
||||
}
|
||||
|
||||
// `!expr` appears as unary_expression, not_operator, or prefix_unary_expression
|
||||
// with a `!` or `not` operator child.
|
||||
let is_negation_wrapper = matches!(
|
||||
cond.kind(),
|
||||
"unary_expression" | "not_operator" | "prefix_unary_expression" | "unary_not"
|
||||
);
|
||||
|
||||
if is_negation_wrapper {
|
||||
// Check if the first child is a `!` or `not` operator.
|
||||
let has_not = cond
|
||||
.child(0)
|
||||
.is_some_and(|c| c.kind() == "!" || c.kind() == "not");
|
||||
|
||||
if has_not {
|
||||
// Return the operand (inner expression after the `!` / `not`).
|
||||
let inner = cond
|
||||
.child_by_field_name("argument")
|
||||
.or_else(|| cond.child_by_field_name("operand"))
|
||||
.or_else(|| {
|
||||
// Last non-operator child.
|
||||
let mut cursor = cond.walk();
|
||||
cond.children(&mut cursor)
|
||||
.filter(|c| c.kind() != "!" && c.kind() != "not")
|
||||
.last()
|
||||
})
|
||||
.unwrap_or(cond);
|
||||
return (inner, true);
|
||||
}
|
||||
}
|
||||
|
||||
(cond, false)
|
||||
}
|
||||
|
||||
/// Create a node in one short borrow and optionally attach a taint label.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn push_node<'a>(
|
||||
|
|
@ -391,6 +653,7 @@ fn push_node<'a>(
|
|||
.child_by_field_name("function")
|
||||
.or_else(|| ast.child_by_field_name("method"))
|
||||
.or_else(|| ast.child_by_field_name("name"))
|
||||
.or_else(|| ast.child_by_field_name("type"))
|
||||
.and_then(|n| text_of(n, code))
|
||||
.unwrap_or_default(),
|
||||
|
||||
|
|
@ -426,7 +689,7 @@ fn push_node<'a>(
|
|||
// the whole line.
|
||||
if matches!(
|
||||
lookup(lang, ast.kind()),
|
||||
Kind::CallWrapper | Kind::Assignment
|
||||
Kind::CallWrapper | Kind::Assignment | Kind::Return
|
||||
) && let Some(inner) = first_call_ident(ast, lang, code)
|
||||
{
|
||||
text = inner;
|
||||
|
|
@ -437,6 +700,20 @@ fn push_node<'a>(
|
|||
let extra = analysis_rules.map(|r| r.extra_labels.as_slice());
|
||||
let mut label = classify(lang, &text, extra);
|
||||
|
||||
// If the outermost call didn't classify, try inner/nested calls.
|
||||
// E.g. `str(eval(expr))` — `str` is not a sink, but `eval` is.
|
||||
if label.is_none()
|
||||
&& matches!(
|
||||
lookup(lang, ast.kind()),
|
||||
Kind::CallWrapper | Kind::Assignment | Kind::Return
|
||||
)
|
||||
&& let Some((inner_text, inner_label)) =
|
||||
find_classifiable_inner_call(ast, lang, code, extra)
|
||||
{
|
||||
label = Some(inner_label);
|
||||
text = inner_text;
|
||||
}
|
||||
|
||||
// For assignments like `element.innerHTML = value`, the inner-call heuristic
|
||||
// above may have overridden `text` with a call on the RHS (e.g. getElementById).
|
||||
// If that didn't produce a label, check the LHS property name — it may be a
|
||||
|
|
@ -493,18 +770,49 @@ fn push_node<'a>(
|
|||
}
|
||||
}
|
||||
|
||||
// For `if let` / `while let` patterns: try to classify the value expression
|
||||
// in the let-condition as a source/sink. E.g. `if let Ok(cmd) = env::var("CMD")`
|
||||
// should recognise `env::var` as a taint source and label this node accordingly.
|
||||
if label.is_none()
|
||||
&& matches!(lookup(lang, ast.kind()), Kind::If | Kind::While)
|
||||
&& let Some(cond) = ast.child_by_field_name("condition")
|
||||
&& cond.kind() == "let_condition"
|
||||
&& let Some(val) = cond.child_by_field_name("value")
|
||||
{
|
||||
if let Some(ident) = first_call_ident(val, lang, code)
|
||||
&& let Some(l) = classify(lang, &ident, extra)
|
||||
{
|
||||
label = Some(l);
|
||||
text = ident;
|
||||
}
|
||||
if label.is_none()
|
||||
&& let Some(ident_text) = text_of(val, code)
|
||||
&& let Some(l) = classify(lang, &ident_text, extra)
|
||||
{
|
||||
label = Some(l);
|
||||
text = ident_text;
|
||||
}
|
||||
}
|
||||
|
||||
let span = (ast.start_byte(), ast.end_byte());
|
||||
|
||||
/* ── 3. GRAPH INSERTION + DEBUG ──────────────────────────────────── */
|
||||
|
||||
let (defines, uses) = def_use(ast, lang, code);
|
||||
|
||||
let callee = if kind == StmtKind::Call {
|
||||
let callee = if kind == StmtKind::Call || label.is_some() {
|
||||
Some(text.clone())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Extract condition metadata for If nodes.
|
||||
let (condition_text, condition_vars, condition_negated) = if kind == StmtKind::If {
|
||||
extract_condition_raw(ast, lang, code)
|
||||
} else {
|
||||
(None, Vec::new(), false)
|
||||
};
|
||||
|
||||
let idx = g.add_node(NodeInfo {
|
||||
kind,
|
||||
span,
|
||||
|
|
@ -514,6 +822,9 @@ fn push_node<'a>(
|
|||
callee,
|
||||
enclosing_func: enclosing_func.map(|s| s.to_string()),
|
||||
call_ordinal,
|
||||
condition_text,
|
||||
condition_vars,
|
||||
condition_negated,
|
||||
});
|
||||
|
||||
debug!(
|
||||
|
|
@ -717,19 +1028,27 @@ fn build_sub<'a>(
|
|||
}
|
||||
exits
|
||||
} else {
|
||||
// No explicit else → if the then-branch falls through
|
||||
// (non-empty exits), the false branch merges with those exits.
|
||||
// If the then-branch terminates (break/return/continue →
|
||||
// empty exits), the false branch flows from the condition
|
||||
// to whatever comes next.
|
||||
if then_exits.is_empty() {
|
||||
vec![cond]
|
||||
} else {
|
||||
if let Some(&first) = then_exits.first() {
|
||||
connect_all(g, &[cond], first, EdgeKind::False);
|
||||
}
|
||||
then_exits.clone()
|
||||
}
|
||||
// No explicit else → create a synthetic pass-through node
|
||||
// for the false path. This avoids routing the False edge
|
||||
// to a then-block exit (which would make it appear that the
|
||||
// false path goes *through* the then-block) and gives
|
||||
// path-sensitive analysis an explicit False edge to record
|
||||
// predicates on.
|
||||
let pass = g.add_node(NodeInfo {
|
||||
kind: StmtKind::Seq,
|
||||
span: (ast.end_byte(), ast.end_byte()),
|
||||
label: None,
|
||||
defines: None,
|
||||
uses: Vec::new(),
|
||||
callee: None,
|
||||
enclosing_func: enclosing_func.map(|s| s.to_string()),
|
||||
call_ordinal: 0,
|
||||
condition_text: None,
|
||||
condition_vars: Vec::new(),
|
||||
condition_negated: false,
|
||||
});
|
||||
connect_all(g, &[cond], pass, EdgeKind::False);
|
||||
vec![pass]
|
||||
};
|
||||
|
||||
// Frontier = union of both branches
|
||||
|
|
@ -995,7 +1314,7 @@ fn build_sub<'a>(
|
|||
collect_idents(n, code, &mut tmp);
|
||||
tmp.into_iter().next()
|
||||
})
|
||||
.unwrap_or_else(|| "<anon>".to_string());
|
||||
.unwrap_or_else(|| format!("<anon@{}>", ast.start_byte()));
|
||||
let entry_idx = push_node(
|
||||
g,
|
||||
StmtKind::Seq,
|
||||
|
|
@ -1016,7 +1335,20 @@ fn build_sub<'a>(
|
|||
// Snapshot the current node count so we can iterate only over nodes
|
||||
// created within this function (avoids O(N²) scan of the full graph).
|
||||
let fn_first_node: NodeIndex = NodeIndex::new(g.node_count());
|
||||
let body = ast.child_by_field_name("body").expect("fn w/o body");
|
||||
let body = ast.child_by_field_name("body").unwrap_or_else(|| {
|
||||
// Some function expressions (e.g. JS anonymous `function(…) { … }`)
|
||||
// don't have a named "body" field — find the first block child.
|
||||
let mut c = ast.walk();
|
||||
ast.children(&mut c)
|
||||
.find(|n| matches!(lookup(lang, n.kind()), Kind::Block | Kind::SourceFile))
|
||||
.unwrap_or_else(|| {
|
||||
panic!(
|
||||
"fn w/o body: kind={} text='{}'",
|
||||
ast.kind(),
|
||||
text_of(ast, code).unwrap_or_default()
|
||||
)
|
||||
})
|
||||
});
|
||||
let mut fn_call_ordinal: u32 = 0;
|
||||
let mut fn_breaks = Vec::new();
|
||||
let mut fn_continues = Vec::new();
|
||||
|
|
@ -1191,6 +1523,9 @@ fn build_sub<'a>(
|
|||
callee: None,
|
||||
enclosing_func: Some(fn_name.clone()),
|
||||
call_ordinal: 0,
|
||||
condition_text: None,
|
||||
condition_vars: Vec::new(),
|
||||
condition_negated: false,
|
||||
});
|
||||
// Wire body exits (fall-through) to the exit node.
|
||||
for &b in &body_exits {
|
||||
|
|
@ -1300,6 +1635,28 @@ fn build_sub<'a>(
|
|||
{
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
// Recurse into any function expressions nested in arguments
|
||||
// (e.g. `app.get('/path', function(req, res) { ... })`)
|
||||
// so that they get proper function summaries.
|
||||
let nested = collect_nested_function_nodes(ast, lang);
|
||||
for func_node in nested {
|
||||
build_sub(
|
||||
func_node,
|
||||
&[node],
|
||||
g,
|
||||
lang,
|
||||
code,
|
||||
summaries,
|
||||
file_path,
|
||||
enclosing_func,
|
||||
call_ordinal,
|
||||
analysis_rules,
|
||||
break_targets,
|
||||
continue_targets,
|
||||
);
|
||||
}
|
||||
|
||||
vec![node]
|
||||
}
|
||||
|
||||
|
|
@ -1326,6 +1683,26 @@ fn build_sub<'a>(
|
|||
{
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
// Recurse into any function expressions nested in arguments
|
||||
let nested = collect_nested_function_nodes(ast, lang);
|
||||
for func_node in nested {
|
||||
build_sub(
|
||||
func_node,
|
||||
&[n],
|
||||
g,
|
||||
lang,
|
||||
code,
|
||||
summaries,
|
||||
file_path,
|
||||
enclosing_func,
|
||||
call_ordinal,
|
||||
analysis_rules,
|
||||
break_targets,
|
||||
continue_targets,
|
||||
);
|
||||
}
|
||||
|
||||
vec![n]
|
||||
}
|
||||
|
||||
|
|
@ -1412,6 +1789,9 @@ pub(crate) fn build_cfg<'a>(
|
|||
callee: None,
|
||||
enclosing_func: None,
|
||||
call_ordinal: 0,
|
||||
condition_text: None,
|
||||
condition_vars: Vec::new(),
|
||||
condition_negated: false,
|
||||
});
|
||||
let exit = g.add_node(NodeInfo {
|
||||
kind: StmtKind::Exit,
|
||||
|
|
@ -1422,6 +1802,9 @@ pub(crate) fn build_cfg<'a>(
|
|||
callee: None,
|
||||
enclosing_func: None,
|
||||
call_ordinal: 0,
|
||||
condition_text: None,
|
||||
condition_vars: Vec::new(),
|
||||
condition_negated: false,
|
||||
});
|
||||
|
||||
// Build the body below the synthetic ENTRY.
|
||||
|
|
|
|||
|
|
@ -33,7 +33,6 @@ pub struct CfgFinding {
|
|||
pub severity: Severity,
|
||||
pub confidence: Confidence,
|
||||
pub span: (usize, usize),
|
||||
#[allow(dead_code)]
|
||||
pub message: String,
|
||||
pub evidence: Vec<NodeIndex>,
|
||||
pub score: Option<f64>,
|
||||
|
|
|
|||
|
|
@ -681,6 +681,8 @@ fn taint_and_unguarded_sink_deduped() {
|
|||
source: entry,
|
||||
path: vec![entry, sink_node],
|
||||
source_kind: crate::labels::SourceKind::UserInput,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
}];
|
||||
|
||||
let findings = parse_and_run_all_with_taint(
|
||||
|
|
|
|||
174
src/cli.rs
174
src/cli.rs
|
|
@ -1,4 +1,4 @@
|
|||
use clap::{Parser, Subcommand};
|
||||
use clap::{Parser, Subcommand, ValueEnum};
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "nyx")]
|
||||
|
|
@ -13,10 +13,55 @@ impl Commands {
|
|||
/// Whether this command produces structured (machine-readable) output on
|
||||
/// stdout, meaning human status messages must be suppressed entirely.
|
||||
pub fn is_structured_output(&self) -> bool {
|
||||
matches!(self, Commands::Scan { format, .. } if format == "json" || format == "sarif")
|
||||
matches!(self, Commands::Scan { format, .. } if *format == OutputFormat::Json || *format == OutputFormat::Sarif)
|
||||
}
|
||||
}
|
||||
|
||||
/// Output format for scan results.
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, ValueEnum, Default)]
|
||||
pub enum OutputFormat {
|
||||
#[default]
|
||||
Console,
|
||||
Json,
|
||||
Sarif,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for OutputFormat {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
OutputFormat::Console => write!(f, "console"),
|
||||
OutputFormat::Json => write!(f, "json"),
|
||||
OutputFormat::Sarif => write!(f, "sarif"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Index mode for scan operations.
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, ValueEnum, Default)]
|
||||
pub enum IndexMode {
|
||||
/// Use index if available, build if missing (default)
|
||||
#[default]
|
||||
Auto,
|
||||
/// Skip indexing entirely, scan filesystem directly
|
||||
Off,
|
||||
/// Force rebuild index before scanning
|
||||
Rebuild,
|
||||
}
|
||||
|
||||
/// Analysis mode for scan operations.
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, ValueEnum, Default)]
|
||||
pub enum ScanMode {
|
||||
/// Run all analyses: AST patterns + CFG + taint (default)
|
||||
#[default]
|
||||
Full,
|
||||
/// Run AST pattern queries only (no CFG/taint)
|
||||
Ast,
|
||||
/// Run CFG structural analyses + taint only (no AST patterns)
|
||||
Cfg,
|
||||
/// Alias for cfg (CFG + taint analysis)
|
||||
Taint,
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
pub enum Commands {
|
||||
/// Scan project for vulnerabilities
|
||||
|
|
@ -25,35 +70,118 @@ pub enum Commands {
|
|||
#[arg(default_value = ".")]
|
||||
path: String,
|
||||
|
||||
/// Skip using/building index, scan directly
|
||||
#[arg(long)]
|
||||
no_index: bool,
|
||||
/// Index mode: auto (default), off (no index), rebuild (force rebuild)
|
||||
#[arg(long, value_enum, default_value_t = IndexMode::Auto)]
|
||||
index: IndexMode,
|
||||
|
||||
/// Force rebuild index before scanning
|
||||
#[arg(long)]
|
||||
rebuild_index: bool,
|
||||
/// Output format
|
||||
#[arg(short, long, value_enum, default_value_t = OutputFormat::Console)]
|
||||
format: OutputFormat,
|
||||
|
||||
/// Output format (console, json, sarif)
|
||||
#[arg(short, long, default_value = "")]
|
||||
format: String,
|
||||
|
||||
/// Show only high severity issues
|
||||
/// Severity filter expression: HIGH, HIGH,MEDIUM, or >=MEDIUM
|
||||
///
|
||||
/// Filters findings AFTER all severity normalization (e.g. nonprod
|
||||
/// downgrades). Only findings matching the expression are emitted.
|
||||
/// Case-insensitive. Shell-quote expressions containing ">".
|
||||
#[arg(long)]
|
||||
high_only: bool,
|
||||
severity: Option<String>,
|
||||
|
||||
#[arg(long)]
|
||||
ast_only: bool,
|
||||
/// Analysis mode: full (default), ast, cfg, taint
|
||||
#[arg(long, value_enum, default_value_t = ScanMode::Full)]
|
||||
mode: ScanMode,
|
||||
|
||||
#[arg(long)]
|
||||
cfg_only: bool,
|
||||
|
||||
#[arg(long)]
|
||||
/// Scan all targets (alias for --mode full)
|
||||
#[arg(long, hide = true)]
|
||||
all_targets: bool,
|
||||
|
||||
/// Include findings from test/vendor/build paths at original severity
|
||||
/// (by default these are downgraded)
|
||||
/// Preserve original severity for test/vendor/build paths
|
||||
///
|
||||
/// By default, findings in non-production paths are downgraded by one
|
||||
/// severity tier. This flag preserves original severity.
|
||||
#[arg(long, alias = "include-nonprod")]
|
||||
keep_nonprod_severity: bool,
|
||||
|
||||
/// Suppress all human-readable status output
|
||||
#[arg(long)]
|
||||
include_nonprod: bool,
|
||||
quiet: bool,
|
||||
|
||||
/// Exit with code 1 if any finding meets or exceeds this severity
|
||||
///
|
||||
/// Useful for CI gating. Example: --fail-on HIGH
|
||||
#[arg(long)]
|
||||
fail_on: Option<String>,
|
||||
|
||||
/// Disable attack-surface ranking (findings are sorted by exploitability by default)
|
||||
#[arg(long)]
|
||||
no_rank: bool,
|
||||
|
||||
/// Show inline-suppressed findings (dimmed, tagged [SUPPRESSED])
|
||||
#[arg(long)]
|
||||
show_suppressed: bool,
|
||||
|
||||
/// Show all findings: disables category filtering, rollups, and LOW budgets
|
||||
#[arg(long = "all")]
|
||||
show_all: bool,
|
||||
|
||||
/// Include Quality findings (excluded by default)
|
||||
#[arg(long)]
|
||||
include_quality: bool,
|
||||
|
||||
/// Maximum total LOW findings to show
|
||||
#[arg(long, default_value_t = 20)]
|
||||
max_low: u32,
|
||||
|
||||
/// Maximum LOW findings per file
|
||||
#[arg(long, default_value_t = 1)]
|
||||
max_low_per_file: u32,
|
||||
|
||||
/// Maximum LOW findings per rule
|
||||
#[arg(long, default_value_t = 10)]
|
||||
max_low_per_rule: u32,
|
||||
|
||||
/// Number of example locations in rollup findings
|
||||
#[arg(long, default_value_t = 5)]
|
||||
rollup_examples: u32,
|
||||
|
||||
/// Show all instances for a specific rule (bypasses rollup for that rule)
|
||||
#[arg(long)]
|
||||
show_instances: Option<String>,
|
||||
|
||||
/// Minimum attack-surface score to include in output
|
||||
///
|
||||
/// Findings with a rank score below this threshold are suppressed.
|
||||
/// Requires ranking to be enabled (has no effect with --no-rank).
|
||||
/// Example: --min-score 50
|
||||
#[arg(long)]
|
||||
min_score: Option<u32>,
|
||||
|
||||
/// Minimum confidence level to include in output
|
||||
///
|
||||
/// Values: low, medium, high. Findings below this level are dropped.
|
||||
/// JSON/SARIF include all unless filtered.
|
||||
#[arg(long)]
|
||||
min_confidence: Option<String>,
|
||||
|
||||
// ── Deprecated aliases (hidden) ─────────────────────────────────
|
||||
/// Deprecated: use --index off
|
||||
#[arg(long, hide = true)]
|
||||
no_index: bool,
|
||||
|
||||
/// Deprecated: use --index rebuild
|
||||
#[arg(long, hide = true)]
|
||||
rebuild_index: bool,
|
||||
|
||||
/// Deprecated: use --severity HIGH
|
||||
#[arg(long, hide = true)]
|
||||
high_only: bool,
|
||||
|
||||
/// Deprecated: use --mode ast
|
||||
#[arg(long, hide = true)]
|
||||
ast_only: bool,
|
||||
|
||||
/// Deprecated: use --mode cfg
|
||||
#[arg(long, hide = true)]
|
||||
cfg_only: bool,
|
||||
},
|
||||
|
||||
/// Manage project indexes
|
||||
|
|
|
|||
|
|
@ -4,9 +4,9 @@ pub mod index;
|
|||
pub mod list;
|
||||
pub mod scan;
|
||||
|
||||
use crate::cli::Commands;
|
||||
use crate::cli::{Commands, IndexMode, ScanMode};
|
||||
use crate::errors::NyxResult;
|
||||
use crate::patterns::Severity;
|
||||
use crate::patterns::{Severity, SeverityFilter};
|
||||
use crate::utils::config::{AnalysisMode, Config};
|
||||
use std::path::Path;
|
||||
|
||||
|
|
@ -19,36 +19,130 @@ pub fn handle_command(
|
|||
match command {
|
||||
Commands::Scan {
|
||||
path,
|
||||
index,
|
||||
format,
|
||||
severity,
|
||||
mode,
|
||||
all_targets,
|
||||
keep_nonprod_severity,
|
||||
quiet,
|
||||
fail_on,
|
||||
no_rank,
|
||||
show_suppressed,
|
||||
show_all,
|
||||
include_quality,
|
||||
max_low,
|
||||
max_low_per_file,
|
||||
max_low_per_rule,
|
||||
rollup_examples,
|
||||
show_instances,
|
||||
min_score,
|
||||
min_confidence,
|
||||
// Deprecated aliases
|
||||
no_index,
|
||||
rebuild_index,
|
||||
format,
|
||||
high_only,
|
||||
ast_only,
|
||||
cfg_only,
|
||||
all_targets,
|
||||
include_nonprod,
|
||||
} => {
|
||||
if high_only {
|
||||
config.scanner.min_severity = Severity::High
|
||||
// ── Resolve deprecated aliases ──────────────────────────────
|
||||
|
||||
// Index mode: explicit --index wins, then deprecated flags
|
||||
let effective_index = if no_index {
|
||||
IndexMode::Off
|
||||
} else if rebuild_index {
|
||||
IndexMode::Rebuild
|
||||
} else {
|
||||
index
|
||||
};
|
||||
|
||||
if ast_only {
|
||||
config.scanner.mode = AnalysisMode::Ast
|
||||
// Analysis mode: explicit --mode wins, then deprecated flags
|
||||
let effective_mode = if ast_only {
|
||||
ScanMode::Ast
|
||||
} else if cfg_only {
|
||||
ScanMode::Cfg
|
||||
} else if all_targets {
|
||||
ScanMode::Full
|
||||
} else {
|
||||
mode
|
||||
};
|
||||
|
||||
if cfg_only {
|
||||
config.scanner.mode = AnalysisMode::Taint
|
||||
// Severity filter: explicit --severity wins, then --high-only
|
||||
let severity_filter = if let Some(ref expr) = severity {
|
||||
Some(SeverityFilter::parse(expr).map_err(|e| {
|
||||
crate::errors::NyxError::Msg(format!("invalid --severity expression: {e}"))
|
||||
})?)
|
||||
} else if high_only {
|
||||
Some(SeverityFilter::parse("HIGH").unwrap())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
if all_targets {
|
||||
config.scanner.mode = AnalysisMode::Full
|
||||
// Fail-on threshold
|
||||
let fail_on_sev = if let Some(ref expr) = fail_on {
|
||||
Some(expr.trim().parse::<Severity>().map_err(|e| {
|
||||
crate::errors::NyxError::Msg(format!("invalid --fail-on value: {e}"))
|
||||
})?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
if include_nonprod {
|
||||
config.scanner.include_nonprod = true
|
||||
};
|
||||
// ── Apply to config ─────────────────────────────────────────
|
||||
|
||||
scan::handle(&path, no_index, rebuild_index, format, database_dir, config)?;
|
||||
match effective_mode {
|
||||
ScanMode::Full => config.scanner.mode = AnalysisMode::Full,
|
||||
ScanMode::Ast => config.scanner.mode = AnalysisMode::Ast,
|
||||
ScanMode::Cfg | ScanMode::Taint => config.scanner.mode = AnalysisMode::Taint,
|
||||
}
|
||||
|
||||
if keep_nonprod_severity {
|
||||
config.scanner.include_nonprod = true;
|
||||
}
|
||||
|
||||
if quiet {
|
||||
config.output.quiet = true;
|
||||
}
|
||||
|
||||
if no_rank {
|
||||
config.output.attack_surface_ranking = false;
|
||||
}
|
||||
|
||||
// Min-score: CLI wins, then config
|
||||
if let Some(s) = min_score {
|
||||
config.output.min_score = Some(s);
|
||||
}
|
||||
|
||||
// Min-confidence: CLI wins, then config
|
||||
if let Some(ref expr) = min_confidence {
|
||||
config.output.min_confidence =
|
||||
Some(expr.parse::<crate::evidence::Confidence>().map_err(|e| {
|
||||
crate::errors::NyxError::Msg(format!("invalid --min-confidence value: {e}"))
|
||||
})?);
|
||||
}
|
||||
|
||||
if show_all {
|
||||
config.output.show_all = true;
|
||||
}
|
||||
if include_quality {
|
||||
config.output.include_quality = true;
|
||||
}
|
||||
// CLI values override config defaults (clap provides defaults)
|
||||
config.output.max_low = max_low;
|
||||
config.output.max_low_per_file = max_low_per_file;
|
||||
config.output.max_low_per_rule = max_low_per_rule;
|
||||
config.output.rollup_examples = rollup_examples;
|
||||
|
||||
scan::handle(
|
||||
&path,
|
||||
effective_index,
|
||||
format,
|
||||
severity_filter,
|
||||
fail_on_sev,
|
||||
show_suppressed,
|
||||
show_instances.as_deref(),
|
||||
database_dir,
|
||||
config,
|
||||
)?;
|
||||
}
|
||||
Commands::Index { action } => {
|
||||
index::handle(action, database_dir, config)?;
|
||||
|
|
|
|||
1116
src/commands/scan.rs
1116
src/commands/scan.rs
File diff suppressed because it is too large
Load diff
|
|
@ -272,6 +272,18 @@ pub mod index {
|
|||
line: row.get::<_, i64>(2)? as usize,
|
||||
col: row.get::<_, i64>(3)? as usize,
|
||||
severity: Severity::from_str(&sev_str).unwrap(),
|
||||
category: crate::patterns::FindingCategory::Security,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: None,
|
||||
labels: vec![],
|
||||
confidence: None,
|
||||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
})
|
||||
})?;
|
||||
|
||||
|
|
|
|||
396
src/evidence.rs
Normal file
396
src/evidence.rs
Normal file
|
|
@ -0,0 +1,396 @@
|
|||
//! Structured evidence and confidence types for scan diagnostics.
|
||||
//!
|
||||
//! These types capture the provenance of findings (source locations,
|
||||
//! sanitizer/guard info, state-machine transitions) in a structured form
|
||||
//! that can be serialized to JSON and consumed by ranking, filtering,
|
||||
//! and downstream tooling.
|
||||
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::patterns::Severity;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Confidence
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Confidence level for a diagnostic finding.
|
||||
///
|
||||
/// Ordered Low < Medium < High so that `>=` comparisons work naturally
|
||||
/// for filtering (e.g. `--min-confidence medium` keeps Medium and High).
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
pub enum Confidence {
|
||||
Low,
|
||||
Medium,
|
||||
High,
|
||||
}
|
||||
|
||||
impl fmt::Display for Confidence {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Low => write!(f, "Low"),
|
||||
Self::Medium => write!(f, "Medium"),
|
||||
Self::High => write!(f, "High"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Confidence {
|
||||
type Err = String;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s.to_ascii_lowercase().as_str() {
|
||||
"low" => Ok(Self::Low),
|
||||
"medium" | "med" => Ok(Self::Medium),
|
||||
"high" => Ok(Self::High),
|
||||
_ => Err(format!(
|
||||
"unknown confidence level: {s:?} (expected low, medium, high)"
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Evidence
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Structured evidence for a diagnostic finding.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Evidence {
|
||||
/// Where tainted data originated.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub source: Option<SpanEvidence>,
|
||||
|
||||
/// Where the dangerous operation happens.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub sink: Option<SpanEvidence>,
|
||||
|
||||
/// Validation guards protecting this path.
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub guards: Vec<SpanEvidence>,
|
||||
|
||||
/// Sanitizers applied to this path.
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub sanitizers: Vec<SpanEvidence>,
|
||||
|
||||
/// State-machine evidence (resource lifecycle / auth).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub state: Option<StateEvidence>,
|
||||
|
||||
/// Free-form notes for ranking and display.
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub notes: Vec<String>,
|
||||
}
|
||||
|
||||
impl Evidence {
|
||||
/// Returns `true` if the evidence contains no useful data.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.source.is_none()
|
||||
&& self.sink.is_none()
|
||||
&& self.guards.is_empty()
|
||||
&& self.sanitizers.is_empty()
|
||||
&& self.state.is_none()
|
||||
&& self.notes.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
/// A source-location evidence span.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SpanEvidence {
|
||||
pub path: String,
|
||||
pub line: u32,
|
||||
pub col: u32,
|
||||
/// One of: `"source"`, `"sink"`, `"guard"`, `"sanitizer"`.
|
||||
pub kind: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub snippet: Option<String>,
|
||||
}
|
||||
|
||||
/// Evidence from a state-machine analysis (resource lifecycle / auth).
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct StateEvidence {
|
||||
/// The state machine: `"resource"` or `"auth"`.
|
||||
pub machine: String,
|
||||
/// Variable name if available.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub subject: Option<String>,
|
||||
/// State before the event.
|
||||
pub from_state: String,
|
||||
/// State after the event.
|
||||
pub to_state: String,
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// compute_confidence
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Derive a confidence level for `diag` based on its rule ID, severity,
|
||||
/// evidence, and analysis kind.
|
||||
///
|
||||
/// This is called as a post-pass after all findings are collected; findings
|
||||
/// that already have a confidence set (e.g. from CFG analysis) are preserved.
|
||||
pub fn compute_confidence(diag: &Diag) -> Confidence {
|
||||
// Degraded analysis caps confidence
|
||||
if let Some(ev) = &diag.evidence
|
||||
&& ev.notes.iter().any(|n| n.starts_with("degraded:"))
|
||||
{
|
||||
return Confidence::Low;
|
||||
}
|
||||
|
||||
let id = &diag.id;
|
||||
|
||||
if id.starts_with("taint-") {
|
||||
if let Some(ev) = &diag.evidence
|
||||
&& ev.notes.iter().any(|n| n == "path_validated")
|
||||
{
|
||||
return Confidence::Medium;
|
||||
}
|
||||
// source+sink present = High
|
||||
if let Some(ev) = &diag.evidence
|
||||
&& ev.source.is_some()
|
||||
&& ev.sink.is_some()
|
||||
{
|
||||
return Confidence::High;
|
||||
}
|
||||
return Confidence::High; // default for taint
|
||||
}
|
||||
|
||||
if id.starts_with("state-") {
|
||||
return match id.as_str() {
|
||||
"state-use-after-close" => Confidence::High,
|
||||
"state-double-close" => Confidence::High,
|
||||
"state-unauthed-access" => Confidence::High,
|
||||
"state-resource-leak" => Confidence::Medium,
|
||||
"state-resource-leak-possible" => Confidence::Low,
|
||||
_ => Confidence::Medium,
|
||||
};
|
||||
}
|
||||
|
||||
if id.starts_with("cfg-") {
|
||||
// If CFG conversion already set confidence, preserve it
|
||||
return diag.confidence.unwrap_or(Confidence::Medium);
|
||||
}
|
||||
|
||||
// AST patterns: High severity → Medium confidence, else Low
|
||||
if diag.severity == Severity::High {
|
||||
Confidence::Medium
|
||||
} else {
|
||||
Confidence::Low
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Tests
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn make_diag(id: &str, severity: Severity) -> Diag {
|
||||
Diag {
|
||||
path: "test.rs".into(),
|
||||
line: 1,
|
||||
col: 1,
|
||||
severity,
|
||||
id: id.into(),
|
||||
category: crate::patterns::FindingCategory::Security,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: None,
|
||||
labels: vec![],
|
||||
confidence: None,
|
||||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_confidence_taint_high() {
|
||||
let mut d = make_diag("taint-unsanitised-flow (source 1:1)", Severity::High);
|
||||
d.evidence = Some(Evidence {
|
||||
source: Some(SpanEvidence {
|
||||
path: "test.rs".into(),
|
||||
line: 1,
|
||||
col: 1,
|
||||
kind: "source".into(),
|
||||
snippet: Some("env::var(\"X\")".into()),
|
||||
}),
|
||||
sink: Some(SpanEvidence {
|
||||
path: "test.rs".into(),
|
||||
line: 10,
|
||||
col: 5,
|
||||
kind: "sink".into(),
|
||||
snippet: Some("exec()".into()),
|
||||
}),
|
||||
guards: vec![],
|
||||
sanitizers: vec![],
|
||||
state: None,
|
||||
notes: vec![],
|
||||
});
|
||||
assert_eq!(compute_confidence(&d), Confidence::High);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_confidence_taint_validated() {
|
||||
let mut d = make_diag("taint-unsanitised-flow (source 1:1)", Severity::High);
|
||||
d.evidence = Some(Evidence {
|
||||
source: Some(SpanEvidence {
|
||||
path: "test.rs".into(),
|
||||
line: 1,
|
||||
col: 1,
|
||||
kind: "source".into(),
|
||||
snippet: None,
|
||||
}),
|
||||
sink: Some(SpanEvidence {
|
||||
path: "test.rs".into(),
|
||||
line: 10,
|
||||
col: 5,
|
||||
kind: "sink".into(),
|
||||
snippet: None,
|
||||
}),
|
||||
guards: vec![],
|
||||
sanitizers: vec![],
|
||||
state: None,
|
||||
notes: vec!["path_validated".into()],
|
||||
});
|
||||
assert_eq!(compute_confidence(&d), Confidence::Medium);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_confidence_degraded_caps_to_low() {
|
||||
let mut d = make_diag("taint-unsanitised-flow (source 1:1)", Severity::High);
|
||||
d.evidence = Some(Evidence {
|
||||
source: None,
|
||||
sink: None,
|
||||
guards: vec![],
|
||||
sanitizers: vec![],
|
||||
state: None,
|
||||
notes: vec!["degraded:budget_exceeded".into()],
|
||||
});
|
||||
assert_eq!(compute_confidence(&d), Confidence::Low);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_confidence_state_rules() {
|
||||
assert_eq!(
|
||||
compute_confidence(&make_diag("state-use-after-close", Severity::High)),
|
||||
Confidence::High,
|
||||
);
|
||||
assert_eq!(
|
||||
compute_confidence(&make_diag("state-double-close", Severity::Medium)),
|
||||
Confidence::High,
|
||||
);
|
||||
assert_eq!(
|
||||
compute_confidence(&make_diag("state-unauthed-access", Severity::High)),
|
||||
Confidence::High,
|
||||
);
|
||||
assert_eq!(
|
||||
compute_confidence(&make_diag("state-resource-leak", Severity::Medium)),
|
||||
Confidence::Medium,
|
||||
);
|
||||
assert_eq!(
|
||||
compute_confidence(&make_diag("state-resource-leak-possible", Severity::Low)),
|
||||
Confidence::Low,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_confidence_cfg_preserves_existing() {
|
||||
let mut d = make_diag("cfg-unguarded-sink", Severity::High);
|
||||
d.confidence = Some(Confidence::Low);
|
||||
assert_eq!(compute_confidence(&d), Confidence::Low);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_confidence_ast_low() {
|
||||
let d = make_diag("rs.code_exec.eval", Severity::Medium);
|
||||
assert_eq!(compute_confidence(&d), Confidence::Low);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_confidence_ast_high_severity_medium() {
|
||||
let d = make_diag("rs.code_exec.eval", Severity::High);
|
||||
assert_eq!(compute_confidence(&d), Confidence::Medium);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn evidence_is_empty() {
|
||||
let ev = Evidence {
|
||||
source: None,
|
||||
sink: None,
|
||||
guards: vec![],
|
||||
sanitizers: vec![],
|
||||
state: None,
|
||||
notes: vec![],
|
||||
};
|
||||
assert!(ev.is_empty());
|
||||
|
||||
let ev2 = Evidence {
|
||||
source: Some(SpanEvidence {
|
||||
path: "x.rs".into(),
|
||||
line: 1,
|
||||
col: 1,
|
||||
kind: "source".into(),
|
||||
snippet: None,
|
||||
}),
|
||||
sink: None,
|
||||
guards: vec![],
|
||||
sanitizers: vec![],
|
||||
state: None,
|
||||
notes: vec![],
|
||||
};
|
||||
assert!(!ev2.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn confidence_ord() {
|
||||
assert!(Confidence::Low < Confidence::Medium);
|
||||
assert!(Confidence::Medium < Confidence::High);
|
||||
assert!(Confidence::Low < Confidence::High);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn confidence_display_and_parse() {
|
||||
assert_eq!(Confidence::Low.to_string(), "Low");
|
||||
assert_eq!(Confidence::Medium.to_string(), "Medium");
|
||||
assert_eq!(Confidence::High.to_string(), "High");
|
||||
|
||||
assert_eq!("low".parse::<Confidence>().unwrap(), Confidence::Low);
|
||||
assert_eq!("MEDIUM".parse::<Confidence>().unwrap(), Confidence::Medium);
|
||||
assert_eq!("High".parse::<Confidence>().unwrap(), Confidence::High);
|
||||
assert!("invalid".parse::<Confidence>().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_confidence_does_not_override_preset() {
|
||||
// AST patterns set confidence directly; compute_confidence must not overwrite.
|
||||
let mut d = make_diag("rs.quality.expect", Severity::Low);
|
||||
d.confidence = Some(Confidence::High);
|
||||
// The post-pass only runs when confidence is None, but verify compute_confidence
|
||||
// itself would return something different (Low for AST + Low severity), proving
|
||||
// the guard in scan.rs is necessary.
|
||||
assert_eq!(compute_confidence(&d), Confidence::Low);
|
||||
// The actual guard: confidence is already Some, so scan.rs skips compute_confidence.
|
||||
assert_eq!(d.confidence, Some(Confidence::High));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn json_omits_none_fields() {
|
||||
let ev = Evidence {
|
||||
source: None,
|
||||
sink: None,
|
||||
guards: vec![],
|
||||
sanitizers: vec![],
|
||||
state: None,
|
||||
notes: vec![],
|
||||
};
|
||||
let json = serde_json::to_string(&ev).unwrap();
|
||||
assert_eq!(json, "{}");
|
||||
}
|
||||
}
|
||||
984
src/fmt.rs
Normal file
984
src/fmt.rs
Normal file
|
|
@ -0,0 +1,984 @@
|
|||
//! Console output formatting for scan diagnostics.
|
||||
//!
|
||||
//! Produces professional, security-tool-grade aligned output with a clear
|
||||
//! severity hierarchy, normalised taint flow rendering, and stable wrapping.
|
||||
|
||||
use crate::commands::scan::{Diag, SuppressionStats};
|
||||
use crate::patterns::Severity;
|
||||
use console::style;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
/// Default maximum line width when terminal size is unknown.
|
||||
const DEFAULT_WIDTH: usize = 100;
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Public API
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Render all diagnostics as grouped, formatted console output with a summary.
|
||||
pub fn render_console(
|
||||
diags: &[Diag],
|
||||
project_name: &str,
|
||||
suppression_stats: Option<&SuppressionStats>,
|
||||
) -> String {
|
||||
let width = terminal_width();
|
||||
let mut out = String::new();
|
||||
|
||||
let mut grouped: BTreeMap<&str, Vec<&Diag>> = BTreeMap::new();
|
||||
for d in diags {
|
||||
grouped.entry(&d.path).or_default().push(d);
|
||||
}
|
||||
|
||||
for (path, issues) in &grouped {
|
||||
// File path header — dim blue, never brighter than severity.
|
||||
out.push_str(&format!("{}\n", style(path).blue().dim().underlined()));
|
||||
for d in issues {
|
||||
out.push_str(&render_diag(d, width));
|
||||
out.push('\n'); // blank line between findings
|
||||
}
|
||||
}
|
||||
|
||||
let suppressed_count = diags.iter().filter(|d| d.suppressed).count();
|
||||
let active_count = diags.len() - suppressed_count;
|
||||
|
||||
if suppressed_count > 0 {
|
||||
out.push_str(&format!(
|
||||
"{} '{}' generated {} {} ({} suppressed).\n\n",
|
||||
style("warning").yellow().bold(),
|
||||
style(project_name).white().bold(),
|
||||
style(active_count).bold(),
|
||||
if active_count == 1 { "issue" } else { "issues" },
|
||||
suppressed_count,
|
||||
));
|
||||
} else {
|
||||
out.push_str(&format!(
|
||||
"{} '{}' generated {} {}.\n\n",
|
||||
style("warning").yellow().bold(),
|
||||
style(project_name).white().bold(),
|
||||
style(diags.len()).bold(),
|
||||
if diags.len() == 1 { "issue" } else { "issues" },
|
||||
));
|
||||
}
|
||||
|
||||
// ── Suppression footer ─────────────────────────────────────────────
|
||||
if let Some(stats) = suppression_stats {
|
||||
let total = stats.total_suppressed();
|
||||
if total > 0 {
|
||||
out.push_str(&format!(
|
||||
"{}\n",
|
||||
style(format!("Suppressed {total} LOW/Quality findings.")).dim()
|
||||
));
|
||||
out.push_str(&format!("{}\n", style("Active filters:").dim()));
|
||||
if !stats.include_quality {
|
||||
out.push_str(&format!(
|
||||
" {} {}\n",
|
||||
style("include_quality =").dim(),
|
||||
style("false").dim()
|
||||
));
|
||||
}
|
||||
out.push_str(&format!(
|
||||
" {} {}\n",
|
||||
style("max_low =").dim(),
|
||||
style(stats.max_low).dim()
|
||||
));
|
||||
out.push_str(&format!(
|
||||
" {} {}\n",
|
||||
style("max_low_per_file =").dim(),
|
||||
style(stats.max_low_per_file).dim()
|
||||
));
|
||||
out.push_str(&format!(
|
||||
" {} {}\n",
|
||||
style("max_low_per_rule =").dim(),
|
||||
style(stats.max_low_per_rule).dim()
|
||||
));
|
||||
out.push_str(&format!(
|
||||
"\n{}\n",
|
||||
style("Use --include-quality, --max-low, or --all to adjust.").dim()
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
/// Normalise a code snippet for display: collapse whitespace, join lines,
|
||||
/// clean up method-chain spacing, trim, and truncate.
|
||||
pub fn normalize_snippet(s: &str) -> String {
|
||||
// Strip newlines/carriage returns with no replacement, then collapse
|
||||
// runs of spaces into a single space.
|
||||
let no_newlines: String = s.chars().filter(|c| *c != '\n' && *c != '\r').collect();
|
||||
let collapsed: String = no_newlines.split_whitespace().collect::<Vec<_>>().join(" ");
|
||||
// Clean up `) .foo(` → `).foo(` and similar spacing around dots in chains.
|
||||
let cleaned = collapse_chain_spacing(&collapsed);
|
||||
let trimmed = cleaned.trim();
|
||||
if trimmed.len() > 120 {
|
||||
format!("{}…", &trimmed[..120])
|
||||
} else {
|
||||
trimmed.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// Truncate method chains: keep constructor + first balanced `(...)`, then `…`.
|
||||
///
|
||||
/// E.g. `Command::new("sh").arg("-c").arg(&cmd)` → `Command::new("sh")…`
|
||||
#[allow(dead_code)] // public API, used by consumers
|
||||
pub fn shorten_callee(s: &str) -> String {
|
||||
let s = s.trim();
|
||||
if s.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
|
||||
let Some(open) = s.find('(') else {
|
||||
return s.to_string();
|
||||
};
|
||||
|
||||
let mut depth = 0u32;
|
||||
let mut close = None;
|
||||
for (i, ch) in s[open..].char_indices() {
|
||||
match ch {
|
||||
'(' => depth += 1,
|
||||
')' => {
|
||||
depth -= 1;
|
||||
if depth == 0 {
|
||||
close = Some(open + i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
let Some(close_idx) = close else {
|
||||
return s.to_string();
|
||||
};
|
||||
|
||||
let end = close_idx + 1;
|
||||
if end < s.len() {
|
||||
format!("{}…", &s[..end])
|
||||
} else {
|
||||
s.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Internal rendering
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Indentation for body/evidence lines (spaces).
|
||||
const BODY_INDENT: usize = 6;
|
||||
|
||||
/// Render a single diagnostic block.
|
||||
fn render_diag(d: &Diag, width: usize) -> String {
|
||||
let mut out = String::new();
|
||||
|
||||
// ── Header line ──────────────────────────────────────────────────────
|
||||
// Format: ` 98:5 ⚠ [MEDIUM] taint-unsanitised-flow (Score: 87, Confidence: Medium)`
|
||||
let loc = format!("{}:{}", d.line, d.col);
|
||||
let sev = if d.suppressed {
|
||||
format!("{} {}", style("○").dim(), style("[SUPPRESSED]").dim(),)
|
||||
} else {
|
||||
severity_tag(d.severity)
|
||||
};
|
||||
let meta_suffix = match (d.rank_score, d.confidence) {
|
||||
(Some(s), Some(c)) => format!(
|
||||
" {}",
|
||||
style(format!("(Score: {}, Confidence: {c})", s as u32)).dim()
|
||||
),
|
||||
(Some(s), None) => format!(" {}", style(format!("(Score: {})", s as u32)).dim()),
|
||||
(None, Some(c)) => format!(" {}", style(format!("(Confidence: {c})")).dim()),
|
||||
(None, None) => String::new(),
|
||||
};
|
||||
out.push_str(&format!(
|
||||
" {} {} {}{}\n",
|
||||
style(&loc).dim(),
|
||||
sev,
|
||||
style(&d.id).dim(),
|
||||
meta_suffix,
|
||||
));
|
||||
|
||||
// ── Rollup body ─────────────────────────────────────────────────────
|
||||
let indent_str = " ".repeat(BODY_INDENT);
|
||||
if let Some(ref rollup) = d.rollup {
|
||||
out.push_str(&format!(
|
||||
"{indent_str}{} ({} occurrences)\n",
|
||||
style(&d.id).dim(),
|
||||
rollup.count
|
||||
));
|
||||
if !rollup.occurrences.is_empty() {
|
||||
let examples: Vec<String> = rollup
|
||||
.occurrences
|
||||
.iter()
|
||||
.map(|loc| format!("{}:{}", loc.line, loc.col))
|
||||
.collect();
|
||||
out.push_str(&format!(
|
||||
"{indent_str}{} {}\n",
|
||||
style("Examples:").dim(),
|
||||
style(examples.join(", ")).dim()
|
||||
));
|
||||
}
|
||||
out.push_str(&format!(
|
||||
"{indent_str}{}\n",
|
||||
style(format!("Run: nyx scan --show-instances {}", d.id)).dim()
|
||||
));
|
||||
return out;
|
||||
}
|
||||
|
||||
// ── Message body ─────────────────────────────────────────────────────
|
||||
if let Some(msg) = &d.message {
|
||||
let capitalized = capitalize_first(msg);
|
||||
let wrapped = wrap_text(&capitalized, width, BODY_INDENT);
|
||||
out.push_str(&format!("{indent_str}{wrapped}\n"));
|
||||
}
|
||||
|
||||
// ── Evidence labels (Source, Sink, Path guard) ───────────────────────
|
||||
if !d.labels.is_empty() {
|
||||
out.push('\n');
|
||||
let max_label = d.labels.iter().map(|(k, _)| k.len()).max().unwrap_or(0);
|
||||
let key_width = max_label + 1; // +1 for ':'
|
||||
for (label, value) in &d.labels {
|
||||
let key_str = format!("{label}:");
|
||||
let value_indent = BODY_INDENT + key_width + 1; // key + space
|
||||
let wrapped_val = wrap_text(value, width, value_indent);
|
||||
if label == "Path guard" {
|
||||
out.push_str(&format!(
|
||||
"{indent_str}{:<kw$} {}\n",
|
||||
style(&key_str).dim(),
|
||||
style(&wrapped_val).cyan(),
|
||||
kw = key_width,
|
||||
));
|
||||
} else {
|
||||
out.push_str(&format!(
|
||||
"{indent_str}{:<kw$} {}\n",
|
||||
style(&key_str).dim(),
|
||||
wrapped_val,
|
||||
kw = key_width,
|
||||
));
|
||||
}
|
||||
}
|
||||
} else if let Some(guard) = &d.guard_kind {
|
||||
out.push_str(&format!(
|
||||
"{indent_str}{} {}\n",
|
||||
style("Path guard:").dim(),
|
||||
style(guard).cyan(),
|
||||
));
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
/// Colored severity tag with icon. The tag is the visual anchor of each finding.
|
||||
///
|
||||
/// - HIGH: bold red
|
||||
/// - MEDIUM: bold 208 (orange) — distinct from yellow
|
||||
/// - LOW: dim 67 (muted blue-gray)
|
||||
fn severity_tag(sev: Severity) -> String {
|
||||
match sev {
|
||||
Severity::High => format!(
|
||||
"{} [{}]",
|
||||
style("✖").red().bold(),
|
||||
style("HIGH").red().bold(),
|
||||
),
|
||||
Severity::Medium => format!(
|
||||
"{} [{}]",
|
||||
style("⚠").color256(208).bold(),
|
||||
style("MEDIUM").color256(208).bold(),
|
||||
),
|
||||
Severity::Low => format!(
|
||||
"{} [{}]",
|
||||
style("●").color256(67),
|
||||
style("LOW").color256(67),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Text utilities
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Collapse spacing artefacts in method chains.
|
||||
///
|
||||
/// - `") .foo("` → `").foo("` (space between `)` and `.`)
|
||||
/// - Multiple spaces → single space
|
||||
fn collapse_chain_spacing(s: &str) -> String {
|
||||
let mut out = String::with_capacity(s.len());
|
||||
let chars: Vec<char> = s.chars().collect();
|
||||
let len = chars.len();
|
||||
let mut i = 0;
|
||||
|
||||
while i < len {
|
||||
// Pattern: `)` followed by whitespace then `.`
|
||||
if chars[i] == ')' {
|
||||
out.push(')');
|
||||
i += 1;
|
||||
// Skip whitespace between `)` and `.`
|
||||
let ws_start = i;
|
||||
while i < len && chars[i] == ' ' {
|
||||
i += 1;
|
||||
}
|
||||
if i < len && chars[i] == '.' {
|
||||
// Collapse: emit `.` directly after `)`
|
||||
continue;
|
||||
} else {
|
||||
// Not a chain continuation — emit the whitespace we skipped
|
||||
for c in &chars[ws_start..i] {
|
||||
out.push(*c);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
out.push(chars[i]);
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Word-wrap text to fit within `max_width`, with continuation lines indented
|
||||
/// to `indent` spaces. The first line is NOT indented (caller handles that).
|
||||
fn wrap_text(text: &str, max_width: usize, indent: usize) -> String {
|
||||
let available_first = max_width.saturating_sub(indent);
|
||||
let available_cont = max_width.saturating_sub(indent);
|
||||
if available_first == 0 || text.len() <= available_first {
|
||||
return text.to_string();
|
||||
}
|
||||
|
||||
let indent_str = " ".repeat(indent);
|
||||
let mut result = String::new();
|
||||
let mut line_len = 0usize;
|
||||
let mut first_line = true;
|
||||
|
||||
for word in text.split_whitespace() {
|
||||
let wlen = word.len();
|
||||
let avail = if first_line {
|
||||
available_first
|
||||
} else {
|
||||
available_cont
|
||||
};
|
||||
|
||||
if line_len == 0 {
|
||||
result.push_str(word);
|
||||
line_len = wlen;
|
||||
} else if line_len + 1 + wlen > avail {
|
||||
result.push('\n');
|
||||
result.push_str(&indent_str);
|
||||
result.push_str(word);
|
||||
line_len = wlen;
|
||||
first_line = false;
|
||||
} else {
|
||||
result.push(' ');
|
||||
result.push_str(word);
|
||||
line_len += 1 + wlen;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Get terminal width, falling back to DEFAULT_WIDTH.
|
||||
fn terminal_width() -> usize {
|
||||
terminal_size::terminal_size()
|
||||
.map(|(w, _)| w.0 as usize)
|
||||
.unwrap_or(DEFAULT_WIDTH)
|
||||
}
|
||||
|
||||
/// Capitalise the first character of a string.
|
||||
fn capitalize_first(s: &str) -> String {
|
||||
let mut chars = s.chars();
|
||||
match chars.next() {
|
||||
None => String::new(),
|
||||
Some(c) => {
|
||||
let mut out = String::with_capacity(s.len());
|
||||
for upper in c.to_uppercase() {
|
||||
out.push(upper);
|
||||
}
|
||||
out.push_str(chars.as_str());
|
||||
out
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Tests
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// ── Helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
/// Strip ANSI escape codes for testing visible content.
|
||||
fn strip_ansi(s: &str) -> String {
|
||||
let mut result = String::new();
|
||||
let mut in_escape = false;
|
||||
for ch in s.chars() {
|
||||
if ch == '\x1b' {
|
||||
in_escape = true;
|
||||
} else if in_escape {
|
||||
if ch == 'm' {
|
||||
in_escape = false;
|
||||
}
|
||||
} else {
|
||||
result.push(ch);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
// ── normalize_snippet ────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn normalize_snippet_strips_newlines_no_space() {
|
||||
// Newlines are removed with no whitespace inserted in their place.
|
||||
assert_eq!(normalize_snippet("foo\nbar\rbaz"), "foobarbaz");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_snippet_collapses_whitespace() {
|
||||
assert_eq!(
|
||||
normalize_snippet("Command::new(\"tar\") .arg(\"-czf\")"),
|
||||
"Command::new(\"tar\").arg(\"-czf\")"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_snippet_trims() {
|
||||
assert_eq!(normalize_snippet(" hello "), "hello");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_snippet_truncates_at_120() {
|
||||
let long = "a".repeat(200);
|
||||
let result = normalize_snippet(&long);
|
||||
// 120 chars + '…' (3 bytes UTF-8)
|
||||
assert!(result.len() > 120);
|
||||
assert!(result.ends_with('…'));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_snippet_short_unchanged() {
|
||||
assert_eq!(normalize_snippet("short"), "short");
|
||||
}
|
||||
|
||||
// ── collapse_chain_spacing ───────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn collapse_chain_removes_space_before_dot() {
|
||||
assert_eq!(
|
||||
collapse_chain_spacing("foo() .bar() .baz()"),
|
||||
"foo().bar().baz()"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn collapse_chain_preserves_non_chain_spacing() {
|
||||
assert_eq!(collapse_chain_spacing("foo() + bar()"), "foo() + bar()");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn collapse_chain_multiple_spaces() {
|
||||
assert_eq!(
|
||||
collapse_chain_spacing("cmd() .arg(\"-c\")"),
|
||||
"cmd().arg(\"-c\")"
|
||||
);
|
||||
}
|
||||
|
||||
// ── shorten_callee ───────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn shorten_callee_truncates_chain() {
|
||||
assert_eq!(
|
||||
shorten_callee("Command::new(\"sh\").arg(\"-c\").arg(&cmd)"),
|
||||
"Command::new(\"sh\")…"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shorten_callee_no_chain_unchanged() {
|
||||
assert_eq!(shorten_callee("env::var(\"HOME\")"), "env::var(\"HOME\")");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shorten_callee_nested_parens() {
|
||||
assert_eq!(shorten_callee("foo(bar(1, 2)).baz()"), "foo(bar(1, 2))…");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shorten_callee_no_parens() {
|
||||
assert_eq!(shorten_callee("simple_name"), "simple_name");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shorten_callee_empty() {
|
||||
assert_eq!(shorten_callee(""), "");
|
||||
}
|
||||
|
||||
// ── wrap_text ────────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn wrap_short_text_unchanged() {
|
||||
assert_eq!(wrap_text("short text", 80, 4), "short text");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wrap_breaks_at_boundary() {
|
||||
let text = "word1 word2 word3 word4 word5";
|
||||
let result = wrap_text(text, 20, 4);
|
||||
assert!(result.contains('\n'));
|
||||
for line in result.lines().skip(1) {
|
||||
assert!(line.starts_with(" "));
|
||||
}
|
||||
}
|
||||
|
||||
// ── severity_tag ─────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn severity_tags_contain_level_name() {
|
||||
let h = strip_ansi(&severity_tag(Severity::High));
|
||||
let m = strip_ansi(&severity_tag(Severity::Medium));
|
||||
let l = strip_ansi(&severity_tag(Severity::Low));
|
||||
assert!(h.contains("HIGH"), "got: {h}");
|
||||
assert!(m.contains("MEDIUM"), "got: {m}");
|
||||
assert!(l.contains("LOW"), "got: {l}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn severity_tags_have_icons() {
|
||||
let h = strip_ansi(&severity_tag(Severity::High));
|
||||
let m = strip_ansi(&severity_tag(Severity::Medium));
|
||||
let l = strip_ansi(&severity_tag(Severity::Low));
|
||||
assert!(h.contains('✖'), "HIGH should have ✖");
|
||||
assert!(m.contains('⚠'), "MEDIUM should have ⚠");
|
||||
assert!(l.contains('●'), "LOW should have ●");
|
||||
}
|
||||
|
||||
// ── render_console ───────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn render_console_groups_by_file() {
|
||||
let diags = vec![
|
||||
Diag {
|
||||
path: "src/a.rs".into(),
|
||||
line: 10,
|
||||
col: 5,
|
||||
severity: Severity::High,
|
||||
id: "test-rule".into(),
|
||||
category: crate::patterns::FindingCategory::Security,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: Some("test message".into()),
|
||||
labels: vec![],
|
||||
confidence: None,
|
||||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
},
|
||||
Diag {
|
||||
path: "src/b.rs".into(),
|
||||
line: 20,
|
||||
col: 1,
|
||||
severity: Severity::Low,
|
||||
id: "another-rule".into(),
|
||||
category: crate::patterns::FindingCategory::Security,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: None,
|
||||
labels: vec![],
|
||||
confidence: None,
|
||||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
},
|
||||
];
|
||||
let output = render_console(&diags, "test-project", None);
|
||||
let stripped = strip_ansi(&output);
|
||||
assert!(stripped.contains("src/a.rs"));
|
||||
assert!(stripped.contains("src/b.rs"));
|
||||
assert!(stripped.contains("2 issues"));
|
||||
assert!(stripped.contains("test-project"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn render_console_evidence_displayed() {
|
||||
let diags = vec![Diag {
|
||||
path: "src/main.rs".into(),
|
||||
line: 42,
|
||||
col: 5,
|
||||
severity: Severity::High,
|
||||
id: "taint-unsanitised-flow (source 12:3)".into(),
|
||||
category: crate::patterns::FindingCategory::Security,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: Some("unsanitised input".into()),
|
||||
labels: vec![
|
||||
("Source".into(), "env::var(\"HOME\") at 12:3".into()),
|
||||
("Sink".into(), "Command::new(\"sh\")".into()),
|
||||
],
|
||||
confidence: None,
|
||||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
}];
|
||||
let output = render_console(&diags, "proj", None);
|
||||
let stripped = strip_ansi(&output);
|
||||
assert!(stripped.contains("Source:"), "should contain Source label");
|
||||
assert!(stripped.contains("Sink:"), "should contain Sink label");
|
||||
// No backticks in output
|
||||
assert!(
|
||||
!stripped.contains('`'),
|
||||
"should not contain backticks in evidence"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn render_console_blank_line_between_findings() {
|
||||
let diags = vec![
|
||||
Diag {
|
||||
path: "src/a.rs".into(),
|
||||
line: 1,
|
||||
col: 1,
|
||||
severity: Severity::High,
|
||||
id: "rule-a".into(),
|
||||
category: crate::patterns::FindingCategory::Security,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: Some("first".into()),
|
||||
labels: vec![],
|
||||
confidence: None,
|
||||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
},
|
||||
Diag {
|
||||
path: "src/a.rs".into(),
|
||||
line: 10,
|
||||
col: 1,
|
||||
severity: Severity::Medium,
|
||||
id: "rule-b".into(),
|
||||
category: crate::patterns::FindingCategory::Security,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: Some("second".into()),
|
||||
labels: vec![],
|
||||
confidence: None,
|
||||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
},
|
||||
];
|
||||
let output = render_console(&diags, "proj", None);
|
||||
let stripped = strip_ansi(&output);
|
||||
// There should be a blank line between the two findings
|
||||
assert!(
|
||||
stripped.contains("First\n\n"),
|
||||
"blank line between findings: {stripped}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn json_omits_empty_labels() {
|
||||
let d = Diag {
|
||||
path: "x.rs".into(),
|
||||
line: 1,
|
||||
col: 1,
|
||||
severity: Severity::Low,
|
||||
id: "test".into(),
|
||||
category: crate::patterns::FindingCategory::Security,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: None,
|
||||
labels: vec![],
|
||||
confidence: None,
|
||||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
};
|
||||
let json = serde_json::to_string(&d).unwrap();
|
||||
assert!(
|
||||
!json.contains("labels"),
|
||||
"empty labels should be omitted from JSON"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn json_omits_rank_fields_when_none() {
|
||||
let d = Diag {
|
||||
path: "x.rs".into(),
|
||||
line: 1,
|
||||
col: 1,
|
||||
severity: Severity::Low,
|
||||
id: "test".into(),
|
||||
category: crate::patterns::FindingCategory::Security,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: None,
|
||||
labels: vec![],
|
||||
confidence: None,
|
||||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
};
|
||||
let json = serde_json::to_string(&d).unwrap();
|
||||
assert!(
|
||||
!json.contains("rank_score"),
|
||||
"rank_score should be omitted when None"
|
||||
);
|
||||
assert!(
|
||||
!json.contains("rank_reason"),
|
||||
"rank_reason should be omitted when None"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn json_includes_rank_score_when_set() {
|
||||
let d = Diag {
|
||||
path: "x.rs".into(),
|
||||
line: 1,
|
||||
col: 1,
|
||||
severity: Severity::High,
|
||||
id: "taint-unsanitised-flow".into(),
|
||||
category: crate::patterns::FindingCategory::Security,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: None,
|
||||
labels: vec![],
|
||||
confidence: None,
|
||||
evidence: None,
|
||||
rank_score: Some(120.0),
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
};
|
||||
let json = serde_json::to_string(&d).unwrap();
|
||||
assert!(
|
||||
json.contains("rank_score"),
|
||||
"rank_score should be present when set"
|
||||
);
|
||||
assert!(json.contains("120"), "rank_score value should appear");
|
||||
}
|
||||
|
||||
// ── capitalize_first ─────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn capitalize_first_works() {
|
||||
assert_eq!(capitalize_first("hello"), "Hello");
|
||||
assert_eq!(capitalize_first(""), "");
|
||||
assert_eq!(capitalize_first("A"), "A");
|
||||
assert_eq!(capitalize_first("unsanitised"), "Unsanitised");
|
||||
}
|
||||
|
||||
// ── taint flow rendering (integration-style) ─────────────────────────
|
||||
|
||||
#[test]
|
||||
fn taint_flow_no_broken_backticks_or_weird_spacing() {
|
||||
let raw_sink = "Command::new(\"tar\") .arg(\"-czf\") .arg(\"/backups/nightly.tar.gz\") .arg(\"/var/data\") .output()";
|
||||
let normalised = normalize_snippet(raw_sink);
|
||||
// Chain spacing should be collapsed
|
||||
assert!(
|
||||
!normalised.contains(") ."),
|
||||
"chain spacing should be collapsed: {normalised}"
|
||||
);
|
||||
assert!(!normalised.contains(" "), "no double-spaces: {normalised}");
|
||||
// Should not contain backticks
|
||||
assert!(!normalised.contains('`'), "no backticks: {normalised}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multiline_sink_joined_and_normalised() {
|
||||
let raw = "Command::new(\"tar\")\n .arg(\"-czf\")\n .arg(\"/backups/nightly.tar.gz\")\n .arg(\"/var/data\")\n .output()";
|
||||
let normalised = normalize_snippet(raw);
|
||||
assert_eq!(
|
||||
normalised,
|
||||
"Command::new(\"tar\").arg(\"-czf\").arg(\"/backups/nightly.tar.gz\").arg(\"/var/data\").output()"
|
||||
);
|
||||
}
|
||||
|
||||
// ── confidence display ──────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn confidence_after_score_on_header_line() {
|
||||
let d = Diag {
|
||||
path: "src/a.rs".into(),
|
||||
line: 510,
|
||||
col: 5,
|
||||
severity: Severity::Medium,
|
||||
id: "cfg-unguarded-sink".into(),
|
||||
category: crate::patterns::FindingCategory::Security,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: Some("dangerous sink".into()),
|
||||
labels: vec![],
|
||||
confidence: Some(crate::evidence::Confidence::Medium),
|
||||
evidence: None,
|
||||
rank_score: Some(36.0),
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
};
|
||||
let output = render_diag(&d, 120);
|
||||
let stripped = strip_ansi(&output);
|
||||
// Header line should contain score and confidence together
|
||||
let header = stripped.lines().next().unwrap();
|
||||
assert!(
|
||||
header.contains("(Score: 36, Confidence: Medium)"),
|
||||
"header should contain '(Score: 36, Confidence: Medium)': {header}"
|
||||
);
|
||||
// No standalone Confidence line
|
||||
let non_header_lines: Vec<&str> = stripped.lines().skip(1).collect();
|
||||
assert!(
|
||||
!non_header_lines
|
||||
.iter()
|
||||
.any(|l| l.trim().starts_with("Confidence:")),
|
||||
"should not have standalone Confidence line"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn confidence_title_case() {
|
||||
for (conf, expected) in [
|
||||
(crate::evidence::Confidence::Low, "Confidence: Low"),
|
||||
(crate::evidence::Confidence::Medium, "Confidence: Medium"),
|
||||
(crate::evidence::Confidence::High, "Confidence: High"),
|
||||
] {
|
||||
let d = Diag {
|
||||
path: "x.rs".into(),
|
||||
line: 1,
|
||||
col: 1,
|
||||
severity: Severity::Low,
|
||||
id: "test".into(),
|
||||
category: crate::patterns::FindingCategory::Security,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: None,
|
||||
labels: vec![],
|
||||
confidence: Some(conf),
|
||||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
};
|
||||
let output = render_diag(&d, 100);
|
||||
let stripped = strip_ansi(&output);
|
||||
assert!(
|
||||
stripped.contains(expected),
|
||||
"expected '{expected}' in: {stripped}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn confidence_none_only_score() {
|
||||
let d = Diag {
|
||||
path: "src/a.rs".into(),
|
||||
line: 10,
|
||||
col: 5,
|
||||
severity: Severity::High,
|
||||
id: "test-rule".into(),
|
||||
category: crate::patterns::FindingCategory::Security,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: Some("test message".into()),
|
||||
labels: vec![],
|
||||
confidence: None,
|
||||
evidence: None,
|
||||
rank_score: Some(42.0),
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
};
|
||||
let output = render_diag(&d, 100);
|
||||
let stripped = strip_ansi(&output);
|
||||
let header = stripped.lines().next().unwrap();
|
||||
assert!(
|
||||
header.contains("(Score: 42)"),
|
||||
"should show score without confidence: {header}"
|
||||
);
|
||||
assert!(
|
||||
!header.contains("Confidence"),
|
||||
"should not mention confidence when None: {header}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn confidence_only_no_score() {
|
||||
let d = Diag {
|
||||
path: "src/a.rs".into(),
|
||||
line: 10,
|
||||
col: 5,
|
||||
severity: Severity::High,
|
||||
id: "test-rule".into(),
|
||||
category: crate::patterns::FindingCategory::Security,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: None,
|
||||
labels: vec![],
|
||||
confidence: Some(crate::evidence::Confidence::High),
|
||||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
};
|
||||
let output = render_diag(&d, 100);
|
||||
let stripped = strip_ansi(&output);
|
||||
let header = stripped.lines().next().unwrap();
|
||||
assert!(
|
||||
header.contains("(Confidence: High)"),
|
||||
"should show confidence without score: {header}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn json_omits_confidence_when_none() {
|
||||
let d = Diag {
|
||||
path: "x.rs".into(),
|
||||
line: 1,
|
||||
col: 1,
|
||||
severity: Severity::Low,
|
||||
id: "test".into(),
|
||||
category: crate::patterns::FindingCategory::Security,
|
||||
path_validated: false,
|
||||
guard_kind: None,
|
||||
message: None,
|
||||
labels: vec![],
|
||||
confidence: None,
|
||||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
};
|
||||
let json = serde_json::to_string(&d).unwrap();
|
||||
assert!(
|
||||
!json.contains("confidence"),
|
||||
"confidence should be omitted when None: {json}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -31,6 +31,10 @@ pub static RULES: &[LabelRule] = &[
|
|||
matchers: &["printf", "fprintf"],
|
||||
label: DataLabel::Sink(Cap::FMT_STRING),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["fopen", "open"],
|
||||
label: DataLabel::Sink(Cap::FILE_IO),
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
|
|
@ -39,6 +43,9 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
"while_statement" => Kind::While,
|
||||
"for_statement" => Kind::For,
|
||||
"do_statement" => Kind::While,
|
||||
"switch_statement" => Kind::Block,
|
||||
"case_statement" => Kind::Block,
|
||||
"labeled_statement" => Kind::Block,
|
||||
|
||||
"return_statement" => Kind::Return,
|
||||
"break_statement" => Kind::Break,
|
||||
|
|
@ -47,6 +54,7 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
// structure
|
||||
"translation_unit" => Kind::SourceFile,
|
||||
"compound_statement" => Kind::Block,
|
||||
"else_clause" => Kind::Block,
|
||||
"function_definition" => Kind::Function,
|
||||
|
||||
// data-flow
|
||||
|
|
|
|||
|
|
@ -29,6 +29,10 @@ pub static RULES: &[LabelRule] = &[
|
|||
matchers: &["printf", "fprintf"],
|
||||
label: DataLabel::Sink(Cap::FMT_STRING),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["fopen", "open"],
|
||||
label: DataLabel::Sink(Cap::FILE_IO),
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
|
|
@ -38,15 +42,23 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
"for_statement" => Kind::For,
|
||||
"for_range_loop" => Kind::For,
|
||||
"do_statement" => Kind::While,
|
||||
"switch_statement" => Kind::Block,
|
||||
"case_statement" => Kind::Block,
|
||||
"labeled_statement" => Kind::Block,
|
||||
|
||||
"return_statement" => Kind::Return,
|
||||
"throw_statement" => Kind::Return,
|
||||
"break_statement" => Kind::Break,
|
||||
"continue_statement" => Kind::Continue,
|
||||
|
||||
// structure
|
||||
"translation_unit" => Kind::SourceFile,
|
||||
"compound_statement" => Kind::Block,
|
||||
"else_clause" => Kind::Block,
|
||||
"function_definition" => Kind::Function,
|
||||
"try_statement" => Kind::Block,
|
||||
"catch_clause" => Kind::Block,
|
||||
"lambda_expression" => Kind::Block,
|
||||
|
||||
// data-flow
|
||||
"call_expression" => Kind::CallFn,
|
||||
|
|
@ -63,7 +75,7 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
"preproc_include" => Kind::Trivia,
|
||||
"preproc_def" => Kind::Trivia,
|
||||
"using_declaration" => Kind::Trivia,
|
||||
"namespace_definition" => Kind::Trivia,
|
||||
"namespace_definition" => Kind::Block,
|
||||
};
|
||||
|
||||
pub static PARAM_CONFIG: ParamConfig = ParamConfig {
|
||||
|
|
|
|||
|
|
@ -8,7 +8,17 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Source(Cap::all()),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["http.Request", "r.FormValue", "r.URL"],
|
||||
matchers: &[
|
||||
"http.Request",
|
||||
"r.FormValue",
|
||||
"r.URL",
|
||||
"r.Body",
|
||||
"r.Header",
|
||||
"r.URL.Query",
|
||||
"r.URL.Query.Get",
|
||||
"Request.FormValue",
|
||||
"Request.URL",
|
||||
],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
},
|
||||
// ───────── Sanitizers ──────────
|
||||
|
|
@ -17,18 +27,40 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["url.QueryEscape"],
|
||||
matchers: &["url.QueryEscape", "url.PathEscape"],
|
||||
label: DataLabel::Sanitizer(Cap::URL_ENCODE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["filepath.Clean", "filepath.Base"],
|
||||
label: DataLabel::Sanitizer(Cap::FILE_IO),
|
||||
},
|
||||
// ─────────── Sinks ─────────────
|
||||
LabelRule {
|
||||
matchers: &["exec.Command"],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["db.Query", "db.Exec"],
|
||||
matchers: &["db.Query", "db.Exec", "db.QueryRow", "db.Prepare"],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["fmt.Fprintf", "fmt.Sprintf", "fmt.Printf"],
|
||||
label: DataLabel::Sink(Cap::FMT_STRING),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"os.Open",
|
||||
"os.OpenFile",
|
||||
"os.Create",
|
||||
"ioutil.ReadFile",
|
||||
"os.ReadFile",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::FILE_IO),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["template.HTML"],
|
||||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
|
|
@ -46,6 +78,16 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
"statement_list" => Kind::Block,
|
||||
"function_declaration" => Kind::Function,
|
||||
"method_declaration" => Kind::Function,
|
||||
"func_literal" => Kind::Function,
|
||||
"expression_switch_statement" => Kind::Block,
|
||||
"type_switch_statement" => Kind::Block,
|
||||
"expression_case" => Kind::Block,
|
||||
"type_case" => Kind::Block,
|
||||
"default_case" => Kind::Block,
|
||||
"select_statement" => Kind::Block,
|
||||
"communication_case" => Kind::Block,
|
||||
"go_statement" => Kind::Block,
|
||||
"defer_statement" => Kind::Block,
|
||||
|
||||
// data-flow
|
||||
"call_expression" => Kind::CallFn,
|
||||
|
|
|
|||
|
|
@ -8,7 +8,19 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Source(Cap::all()),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["getParameter", "getInputStream", "getHeader", "getCookies"],
|
||||
matchers: &[
|
||||
"getParameter",
|
||||
"getInputStream",
|
||||
"getHeader",
|
||||
"getCookies",
|
||||
"getReader",
|
||||
"getQueryString",
|
||||
"getPathInfo",
|
||||
],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["readObject", "readLine"],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
},
|
||||
// ───────── Sanitizers ──────────
|
||||
|
|
@ -18,13 +30,21 @@ pub static RULES: &[LabelRule] = &[
|
|||
},
|
||||
// ─────────── Sinks ─────────────
|
||||
LabelRule {
|
||||
matchers: &["Runtime.exec"],
|
||||
matchers: &["Runtime.exec", "ProcessBuilder"],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["executeQuery", "executeUpdate", "prepareStatement"],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["Class.forName"],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["println", "print", "write"],
|
||||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
|
|
@ -33,8 +53,10 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
"while_statement" => Kind::While,
|
||||
"for_statement" => Kind::For,
|
||||
"enhanced_for_statement" => Kind::For,
|
||||
"do_statement" => Kind::While,
|
||||
|
||||
"return_statement" => Kind::Return,
|
||||
"throw_statement" => Kind::Return,
|
||||
"break_statement" => Kind::Break,
|
||||
"continue_statement" => Kind::Continue,
|
||||
|
||||
|
|
@ -46,6 +68,15 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
"interface_body" => Kind::Block,
|
||||
"method_declaration" => Kind::Function,
|
||||
"constructor_declaration" => Kind::Function,
|
||||
"switch_expression" => Kind::Block,
|
||||
"switch_block" => Kind::Block,
|
||||
"switch_block_statement_group" => Kind::Block,
|
||||
"try_statement" => Kind::Block,
|
||||
"catch_clause" => Kind::Block,
|
||||
"finally_clause" => Kind::Block,
|
||||
"lambda_expression" => Kind::Block,
|
||||
"constructor_body" => Kind::Block,
|
||||
"static_initializer" => Kind::Block,
|
||||
|
||||
// data-flow
|
||||
"method_invocation" => Kind::CallMethod,
|
||||
|
|
|
|||
|
|
@ -62,6 +62,7 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
"while_statement" => Kind::While,
|
||||
"for_statement" => Kind::For,
|
||||
"for_in_statement" => Kind::For,
|
||||
"do_statement" => Kind::While,
|
||||
|
||||
"return_statement" => Kind::Return,
|
||||
"throw_statement" => Kind::Return,
|
||||
|
|
@ -71,9 +72,24 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
// structure
|
||||
"program" => Kind::SourceFile,
|
||||
"statement_block" => Kind::Block,
|
||||
"else_clause" => Kind::Block,
|
||||
"function_declaration" => Kind::Function,
|
||||
"function_expression" => Kind::Function,
|
||||
"arrow_function" => Kind::Function,
|
||||
"method_definition" => Kind::Function,
|
||||
"generator_function_declaration" => Kind::Function,
|
||||
"generator_function" => Kind::Function,
|
||||
"switch_statement" => Kind::Block,
|
||||
"switch_body" => Kind::Block,
|
||||
"switch_case" => Kind::Block,
|
||||
"switch_default" => Kind::Block,
|
||||
"try_statement" => Kind::Block,
|
||||
"catch_clause" => Kind::Block,
|
||||
"finally_clause" => Kind::Block,
|
||||
"class_declaration" => Kind::Block,
|
||||
"class" => Kind::Block,
|
||||
"class_body" => Kind::Block,
|
||||
"export_statement" => Kind::Block,
|
||||
|
||||
// data-flow
|
||||
"call_expression" => Kind::CallFn,
|
||||
|
|
|
|||
|
|
@ -41,7 +41,6 @@ pub enum Kind {
|
|||
InfiniteLoop,
|
||||
While,
|
||||
For,
|
||||
LoopBody,
|
||||
CallFn,
|
||||
CallMethod,
|
||||
CallMacro,
|
||||
|
|
@ -196,7 +195,7 @@ pub fn lookup(lang: &str, raw: &str) -> Kind {
|
|||
}
|
||||
|
||||
/// The kind of taint source, used to refine finding severity.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum SourceKind {
|
||||
/// Direct user input (request params, argv, stdin, form data)
|
||||
UserInput,
|
||||
|
|
@ -375,6 +374,11 @@ pub fn classify(lang: &str, text: &str, extra: Option<&[RuntimeLabelRule]>) -> O
|
|||
let head = text.split(['(', '<']).next().unwrap_or("");
|
||||
let trimmed = head.trim().as_bytes();
|
||||
|
||||
// For chained calls like `r.URL.Query().Get`, also strip internal
|
||||
// `().` segments to produce a normalized form like `r.URL.Query.Get`.
|
||||
let full_normalized = normalize_chained_call(text);
|
||||
let full_norm_bytes = full_normalized.as_bytes();
|
||||
|
||||
// ── Check runtime (config) rules first — they take priority ──────
|
||||
if let Some(extras) = extra {
|
||||
// Pass 1: exact / suffix
|
||||
|
|
@ -384,12 +388,8 @@ pub fn classify(lang: &str, text: &str, extra: Option<&[RuntimeLabelRule]>) -> O
|
|||
if m.last() == Some(&b'_') {
|
||||
continue;
|
||||
}
|
||||
if ends_with_ignore_case(trimmed, m) {
|
||||
let start = trimmed.len() - m.len();
|
||||
let ok = start == 0 || matches!(trimmed[start - 1], b'.' | b':');
|
||||
if ok {
|
||||
return Some(rule.label);
|
||||
}
|
||||
if match_suffix(trimmed, m) || match_suffix(full_norm_bytes, m) {
|
||||
return Some(rule.label);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -397,7 +397,10 @@ pub fn classify(lang: &str, text: &str, extra: Option<&[RuntimeLabelRule]>) -> O
|
|||
for rule in extras {
|
||||
for raw in &rule.matchers {
|
||||
let m = raw.as_bytes();
|
||||
if m.last() == Some(&b'_') && starts_with_ignore_case(trimmed, m) {
|
||||
if m.last() == Some(&b'_')
|
||||
&& (starts_with_ignore_case(trimmed, m)
|
||||
|| starts_with_ignore_case(full_norm_bytes, m))
|
||||
{
|
||||
return Some(rule.label);
|
||||
}
|
||||
}
|
||||
|
|
@ -417,12 +420,8 @@ pub fn classify(lang: &str, text: &str, extra: Option<&[RuntimeLabelRule]>) -> O
|
|||
if m.last() == Some(&b'_') {
|
||||
continue;
|
||||
}
|
||||
if ends_with_ignore_case(trimmed, m) {
|
||||
let start = trimmed.len() - m.len();
|
||||
let ok = start == 0 || matches!(trimmed[start - 1], b'.' | b':');
|
||||
if ok {
|
||||
return Some(rule.label);
|
||||
}
|
||||
if match_suffix(trimmed, m) || match_suffix(full_norm_bytes, m) {
|
||||
return Some(rule.label);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -431,7 +430,10 @@ pub fn classify(lang: &str, text: &str, extra: Option<&[RuntimeLabelRule]>) -> O
|
|||
for rule in *rules {
|
||||
for raw in rule.matchers {
|
||||
let m = raw.as_bytes();
|
||||
if m.last() == Some(&b'_') && starts_with_ignore_case(trimmed, m) {
|
||||
if m.last() == Some(&b'_')
|
||||
&& (starts_with_ignore_case(trimmed, m)
|
||||
|| starts_with_ignore_case(full_norm_bytes, m))
|
||||
{
|
||||
return Some(rule.label);
|
||||
}
|
||||
}
|
||||
|
|
@ -440,6 +442,58 @@ pub fn classify(lang: &str, text: &str, extra: Option<&[RuntimeLabelRule]>) -> O
|
|||
None
|
||||
}
|
||||
|
||||
/// Check if `text` ends with `matcher` at a word boundary (`.` or `:`).
|
||||
#[inline]
|
||||
fn match_suffix(text: &[u8], matcher: &[u8]) -> bool {
|
||||
if ends_with_ignore_case(text, matcher) {
|
||||
let start = text.len() - matcher.len();
|
||||
start == 0 || matches!(text[start - 1], b'.' | b':')
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Normalize a chained method call: strip `()` between `.` segments.
|
||||
/// e.g. `r.URL.Query().Get` → `r.URL.Query.Get`
|
||||
/// e.g. `r.URL.Query().Get("host")` → `r.URL.Query.Get`
|
||||
fn normalize_chained_call(text: &str) -> String {
|
||||
let mut result = String::with_capacity(text.len());
|
||||
let bytes = text.as_bytes();
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
match bytes[i] {
|
||||
b'(' => {
|
||||
// Skip from `(` to matching `)`, but only if followed by `.`
|
||||
// This handles `Query().Get` → `Query.Get`
|
||||
let mut depth = 1u32;
|
||||
let mut j = i + 1;
|
||||
while j < bytes.len() && depth > 0 {
|
||||
if bytes[j] == b'(' {
|
||||
depth += 1;
|
||||
} else if bytes[j] == b')' {
|
||||
depth -= 1;
|
||||
}
|
||||
j += 1;
|
||||
}
|
||||
// If we're at end or next char is `.`, skip the parens
|
||||
if j >= bytes.len() || bytes[j] == b'.' {
|
||||
i = j;
|
||||
} else {
|
||||
// Keep the paren content (unusual case)
|
||||
result.push('(');
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
b'<' => break, // Stop at generic args
|
||||
_ => {
|
||||
result.push(bytes[i] as char);
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
|
|
|||
|
|
@ -3,8 +3,24 @@ use phf::{Map, phf_map};
|
|||
|
||||
pub static RULES: &[LabelRule] = &[
|
||||
// ─────────── Sources ───────────
|
||||
// Note: PHP `$` prefix is stripped by collect_idents, so match without `$`.
|
||||
LabelRule {
|
||||
matchers: &["$_GET", "$_POST", "$_REQUEST", "$_COOKIE"],
|
||||
matchers: &[
|
||||
"$_GET",
|
||||
"_GET",
|
||||
"$_POST",
|
||||
"_POST",
|
||||
"$_REQUEST",
|
||||
"_REQUEST",
|
||||
"$_COOKIE",
|
||||
"_COOKIE",
|
||||
"$_FILES",
|
||||
"_FILES",
|
||||
"$_SERVER",
|
||||
"_SERVER",
|
||||
"$_ENV",
|
||||
"_ENV",
|
||||
],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
},
|
||||
LabelRule {
|
||||
|
|
@ -20,17 +36,44 @@ pub static RULES: &[LabelRule] = &[
|
|||
matchers: &["escapeshellarg", "escapeshellcmd"],
|
||||
label: DataLabel::Sanitizer(Cap::SHELL_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["basename"],
|
||||
label: DataLabel::Sanitizer(Cap::FILE_IO),
|
||||
},
|
||||
// ─────────── Sinks ─────────────
|
||||
LabelRule {
|
||||
matchers: &["system", "exec", "passthru", "shell_exec"],
|
||||
matchers: &[
|
||||
"system",
|
||||
"exec",
|
||||
"passthru",
|
||||
"shell_exec",
|
||||
"proc_open",
|
||||
"popen",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["eval", "assert"],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["include", "include_once", "require", "require_once"],
|
||||
label: DataLabel::Sink(Cap::FILE_IO),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["unserialize"],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["move_uploaded_file", "copy", "file_put_contents", "fwrite"],
|
||||
label: DataLabel::Sink(Cap::FILE_IO),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["echo", "print"],
|
||||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["mysqli_query", "pg_query"],
|
||||
matchers: &["mysqli_query", "pg_query", "query"],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
},
|
||||
];
|
||||
|
|
@ -41,16 +84,29 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
"while_statement" => Kind::While,
|
||||
"for_statement" => Kind::For,
|
||||
"foreach_statement" => Kind::For,
|
||||
"do_statement" => Kind::While,
|
||||
|
||||
"return_statement" => Kind::Return,
|
||||
"throw_expression" => Kind::Return,
|
||||
"break_statement" => Kind::Break,
|
||||
"continue_statement" => Kind::Continue,
|
||||
|
||||
// structure
|
||||
"program" => Kind::SourceFile,
|
||||
"compound_statement" => Kind::Block,
|
||||
"else_clause" => Kind::Block,
|
||||
"else_if_clause" => Kind::Block,
|
||||
"function_definition" => Kind::Function,
|
||||
"method_declaration" => Kind::Function,
|
||||
"switch_statement" => Kind::Block,
|
||||
"switch_block" => Kind::Block,
|
||||
"case_statement" => Kind::Block,
|
||||
"default_statement" => Kind::Block,
|
||||
"try_statement" => Kind::Block,
|
||||
"catch_clause" => Kind::Block,
|
||||
"finally_clause" => Kind::Block,
|
||||
"colon_block" => Kind::Block,
|
||||
"class_declaration" => Kind::Block,
|
||||
|
||||
// data-flow
|
||||
"function_call_expression" => Kind::CallFn,
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ pub static RULES: &[LabelRule] = &[
|
|||
},
|
||||
LabelRule {
|
||||
matchers: &["open"],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
label: DataLabel::Sink(Cap::FILE_IO),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
|
|
@ -65,6 +65,14 @@ pub static RULES: &[LabelRule] = &[
|
|||
matchers: &["cursor.execute", "cursor.executemany"],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["send_file", "send_from_directory"],
|
||||
label: DataLabel::Sink(Cap::FILE_IO),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["os.path.realpath"],
|
||||
label: DataLabel::Sanitizer(Cap::FILE_IO),
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
|
|
@ -74,13 +82,24 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
"for_statement" => Kind::For,
|
||||
|
||||
"return_statement" => Kind::Return,
|
||||
"raise_statement" => Kind::Return,
|
||||
"break_statement" => Kind::Break,
|
||||
"continue_statement" => Kind::Continue,
|
||||
|
||||
// structure
|
||||
"module" => Kind::SourceFile,
|
||||
"block" => Kind::Block,
|
||||
"else_clause" => Kind::Block,
|
||||
"elif_clause" => Kind::Block,
|
||||
"with_statement" => Kind::Block,
|
||||
"function_definition" => Kind::Function,
|
||||
"try_statement" => Kind::Block,
|
||||
"except_clause" => Kind::Block,
|
||||
"finally_clause" => Kind::Block,
|
||||
"class_definition" => Kind::Block,
|
||||
"decorated_definition" => Kind::Block,
|
||||
"match_statement" => Kind::Block,
|
||||
"case_clause" => Kind::Block,
|
||||
|
||||
// data-flow
|
||||
"call" => Kind::CallFn,
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
"if" => Kind::If,
|
||||
"unless" => Kind::If,
|
||||
"while" => Kind::While,
|
||||
"until" => Kind::While,
|
||||
"for" => Kind::For,
|
||||
|
||||
"return" => Kind::Return,
|
||||
|
|
@ -49,15 +50,26 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
// structure
|
||||
"program" => Kind::SourceFile,
|
||||
"body_statement" => Kind::Block,
|
||||
"do_block" => Kind::Block,
|
||||
"do_block" => Kind::Function,
|
||||
"then" => Kind::Block,
|
||||
"else" => Kind::Block,
|
||||
"elsif" => Kind::If,
|
||||
|
||||
"begin" => Kind::Block,
|
||||
"rescue" => Kind::Block,
|
||||
"ensure" => Kind::Block,
|
||||
"case" => Kind::Block,
|
||||
"when" => Kind::Block,
|
||||
"class" => Kind::Block,
|
||||
"module" => Kind::Block,
|
||||
"do" => Kind::Block,
|
||||
"block" => Kind::Function,
|
||||
|
||||
// data-flow
|
||||
"call" => Kind::CallFn,
|
||||
"method_call" => Kind::CallFn,
|
||||
"assignment" => Kind::Assignment,
|
||||
"method" => Kind::Function,
|
||||
"singleton_method" => Kind::Function,
|
||||
|
||||
// trivia
|
||||
"comment" => Kind::Trivia,
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Source(Cap::all()),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["fs::read_to_string", "source_file"],
|
||||
matchers: &["source_file"],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
},
|
||||
// ───────── Sanitizers ──────────
|
||||
|
|
@ -36,17 +36,29 @@ pub static RULES: &[LabelRule] = &[
|
|||
matchers: &["sink_html"],
|
||||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"fs::read_to_string",
|
||||
"fs::write",
|
||||
"fs::read",
|
||||
"File::open",
|
||||
"File::create",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::FILE_IO),
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
// control-flow
|
||||
"if_expression" => Kind::If,
|
||||
"loop_expression" => Kind::InfiniteLoop,
|
||||
"loop_statement" => Kind::LoopBody,
|
||||
"while_statement" => Kind::While,
|
||||
"while_expression" => Kind::While,
|
||||
"for_statement" => Kind::For,
|
||||
"for_expression" => Kind::For,
|
||||
|
||||
"return_statement" => Kind::Return,
|
||||
"return_expression" => Kind::Return,
|
||||
"break_expression" => Kind::Break,
|
||||
"break_statement" => Kind::Break,
|
||||
"continue_expression" => Kind::Continue,
|
||||
|
|
@ -55,7 +67,17 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
// structure
|
||||
"source_file" => Kind::SourceFile,
|
||||
"block" => Kind::Block,
|
||||
"else_clause" => Kind::Block,
|
||||
"match_expression" => Kind::Block,
|
||||
"match_block" => Kind::Block,
|
||||
"match_arm" => Kind::Block,
|
||||
"unsafe_block" => Kind::Block,
|
||||
"function_item" => Kind::Function,
|
||||
"closure_expression" => Kind::Block,
|
||||
"async_block" => Kind::Block,
|
||||
"impl_item" => Kind::Block,
|
||||
"trait_item" => Kind::Block,
|
||||
"declaration_list" => Kind::Block,
|
||||
|
||||
// data-flow
|
||||
"call_expression" => Kind::CallFn,
|
||||
|
|
|
|||
|
|
@ -50,18 +50,36 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
"while_statement" => Kind::While,
|
||||
"for_statement" => Kind::For,
|
||||
"for_in_statement" => Kind::For,
|
||||
"for_of_statement" => Kind::For,
|
||||
"do_statement" => Kind::While,
|
||||
|
||||
"return_statement" => Kind::Return,
|
||||
"throw_statement" => Kind::Return,
|
||||
"break_statement" => Kind::Break,
|
||||
"continue_statement" => Kind::Continue,
|
||||
|
||||
// structure
|
||||
"program" => Kind::SourceFile,
|
||||
"statement_block" => Kind::Block,
|
||||
"else_clause" => Kind::Block,
|
||||
"function_declaration" => Kind::Function,
|
||||
"function_expression" => Kind::Function,
|
||||
"arrow_function" => Kind::Function,
|
||||
"method_definition" => Kind::Function,
|
||||
"generator_function_declaration" => Kind::Function,
|
||||
"generator_function" => Kind::Function,
|
||||
"switch_statement" => Kind::Block,
|
||||
"switch_body" => Kind::Block,
|
||||
"switch_case" => Kind::Block,
|
||||
"switch_default" => Kind::Block,
|
||||
"try_statement" => Kind::Block,
|
||||
"catch_clause" => Kind::Block,
|
||||
"finally_clause" => Kind::Block,
|
||||
"class_declaration" => Kind::Block,
|
||||
"class" => Kind::Block,
|
||||
"class_body" => Kind::Block,
|
||||
"abstract_class_declaration" => Kind::Block,
|
||||
"export_statement" => Kind::Block,
|
||||
"enum_declaration" => Kind::Trivia,
|
||||
|
||||
// data-flow
|
||||
"call_expression" => Kind::CallFn,
|
||||
|
|
|
|||
49
src/lib.rs
49
src/lib.rs
|
|
@ -1,19 +1,62 @@
|
|||
// Re-exports for benchmarks and integration tests.
|
||||
// The binary crate (main.rs) is the primary entry point; this lib target
|
||||
// exposes internals for criterion and other tooling.
|
||||
//! # Nyx Scanner
|
||||
//!
|
||||
//! A multi-language static vulnerability scanner. Nyx parses source files with
|
||||
//! [tree-sitter](https://tree-sitter.github.io/), builds intra-procedural
|
||||
//! control-flow graphs ([petgraph](https://docs.rs/petgraph)), and runs
|
||||
//! cross-file taint analysis with a capability-based sanitizer system.
|
||||
//!
|
||||
//! ## Architecture
|
||||
//!
|
||||
//! Nyx uses a **two-pass architecture**:
|
||||
//!
|
||||
//! 1. **Pass 1 — Summary extraction**: Parse each file, build a CFG per function,
|
||||
//! and export a [`summary::FuncSummary`] capturing source/sanitizer/sink capabilities,
|
||||
//! taint propagation behavior, and callee lists. Summaries are persisted to SQLite.
|
||||
//!
|
||||
//! 2. **Pass 2 — Analysis**: Load all summaries into a [`summary::GlobalSummaries`] map,
|
||||
//! re-parse files, and run taint analysis with cross-file callee resolution. CFG
|
||||
//! structural analysis checks for auth gaps, unguarded sinks, and resource leaks.
|
||||
//!
|
||||
//! ## Four Detector Families
|
||||
//!
|
||||
//! - **Taint** ([`taint`]) — Monotone forward dataflow tracking source-to-sink flows
|
||||
//! - **CFG Structural** ([`cfg_analysis`]) — Dominator-based guard and auth-gap detection
|
||||
//! - **State Model** ([`state`]) — Resource lifecycle and authentication state lattices
|
||||
//! - **AST Patterns** ([`patterns`]) — Tree-sitter structural queries per language
|
||||
//!
|
||||
//! ## Supported Languages
|
||||
//!
|
||||
//! Rust, C, C++, Java, Go, PHP, Python, Ruby, TypeScript, JavaScript.
|
||||
//!
|
||||
//! ## Entry Points
|
||||
//!
|
||||
//! - [`scan_no_index`] — Run a two-pass scan without indexing (for tests)
|
||||
//! - [`commands::scan::scan_filesystem`] — Filesystem scan with optional indexing
|
||||
//! - [`commands::scan::scan_with_index_parallel`] — Index-backed parallel scan
|
||||
//!
|
||||
//! ## Documentation
|
||||
//!
|
||||
//! See the [`docs/`](https://github.com/elicpeter/nyx/tree/master/docs) directory
|
||||
//! for user and contributor documentation.
|
||||
|
||||
pub mod ast;
|
||||
pub mod callgraph;
|
||||
pub mod cfg;
|
||||
pub mod cfg_analysis;
|
||||
pub(crate) mod cli;
|
||||
pub mod commands;
|
||||
pub mod database;
|
||||
pub mod errors;
|
||||
pub mod evidence;
|
||||
pub mod fmt;
|
||||
pub mod interop;
|
||||
pub mod labels;
|
||||
pub mod output;
|
||||
pub mod patterns;
|
||||
pub mod rank;
|
||||
pub mod state;
|
||||
pub mod summary;
|
||||
pub mod suppress;
|
||||
pub mod symbol;
|
||||
pub mod taint;
|
||||
pub mod utils;
|
||||
|
|
|
|||
16
src/main.rs
16
src/main.rs
|
|
@ -1,15 +1,21 @@
|
|||
mod ast;
|
||||
mod callgraph;
|
||||
mod cfg;
|
||||
mod cfg_analysis;
|
||||
mod cli;
|
||||
mod commands;
|
||||
mod database;
|
||||
mod errors;
|
||||
mod evidence;
|
||||
mod fmt;
|
||||
mod interop;
|
||||
mod labels;
|
||||
mod output;
|
||||
mod patterns;
|
||||
mod rank;
|
||||
mod state;
|
||||
mod summary;
|
||||
mod suppress;
|
||||
mod symbol;
|
||||
mod taint;
|
||||
mod utils;
|
||||
|
|
@ -25,7 +31,7 @@ use std::fs;
|
|||
use std::time::Instant;
|
||||
use tracing_subscriber::fmt::time;
|
||||
use tracing_subscriber::prelude::*;
|
||||
use tracing_subscriber::{EnvFilter, Registry, fmt};
|
||||
use tracing_subscriber::{EnvFilter, Registry, fmt as tracing_fmt};
|
||||
// use tracing_appender::rolling::{RollingFileAppender, Rotation};
|
||||
// use tracing_appender::non_blocking;
|
||||
|
||||
|
|
@ -33,7 +39,7 @@ fn init_tracing() {
|
|||
// let file_appender = RollingFileAppender::new(Rotation::HOURLY, "logs", "nyx-scanner.log");
|
||||
// let (file_writer, guard) = non_blocking(file_appender);
|
||||
|
||||
let fmt_layer = fmt::layer()
|
||||
let fmt_layer = tracing_fmt::layer()
|
||||
.pretty()
|
||||
.with_thread_ids(true)
|
||||
.with_timer(time::UtcTime::rfc_3339());
|
||||
|
|
@ -56,8 +62,8 @@ fn main() -> NyxResult<()> {
|
|||
tracing::debug!("CLI starting up");
|
||||
let cli = Cli::parse();
|
||||
|
||||
let proj_dirs = ProjectDirs::from("dev", "ecpeter23", "nyx")
|
||||
.ok_or("Unable to determine project directories")?;
|
||||
let proj_dirs =
|
||||
ProjectDirs::from("", "", "nyx").ok_or("Unable to determine project directories")?;
|
||||
|
||||
// todo: check if we want to actually build a config file, maybe some environments will not want to have anything written
|
||||
let config_dir = proj_dirs.config_dir();
|
||||
|
|
@ -83,7 +89,7 @@ fn main() -> NyxResult<()> {
|
|||
commands::handle_command(cli.command, database_dir, config_dir, &mut config)?;
|
||||
|
||||
if !quiet {
|
||||
println!(
|
||||
eprintln!(
|
||||
"{} in {:.3}s.",
|
||||
style("Finished").green().bold(),
|
||||
now.elapsed().as_secs_f32()
|
||||
|
|
|
|||
|
|
@ -38,6 +38,11 @@ fn cfg_rule_description(id: &str) -> Option<&'static str> {
|
|||
}
|
||||
"cfg-resource-leak" => Some("Resource acquired but not released on all exit paths"),
|
||||
"cfg-lock-not-released" => Some("Lock acquired but not released on all exit paths"),
|
||||
"state-use-after-close" => Some("Variable used after its resource handle was closed"),
|
||||
"state-double-close" => Some("Resource handle closed more than once"),
|
||||
"state-resource-leak" => Some("Resource acquired but never closed"),
|
||||
"state-resource-leak-possible" => Some("Resource may not be closed on all paths"),
|
||||
"state-unauthed-access" => Some("Sensitive operation reached without authentication"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
@ -116,11 +121,17 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value {
|
|||
.map(|p| p.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|_| d.path.clone());
|
||||
|
||||
json!({
|
||||
// Prefer the per-finding message (e.g. from state analysis) over the generic rule description.
|
||||
let msg_text = d
|
||||
.message
|
||||
.as_deref()
|
||||
.unwrap_or_else(|| rule_description(base));
|
||||
|
||||
let mut result = json!({
|
||||
"ruleId": base,
|
||||
"ruleIndex": rule_index,
|
||||
"level": severity_to_level(d.severity),
|
||||
"message": { "text": rule_description(base) },
|
||||
"message": { "text": msg_text },
|
||||
"locations": [{
|
||||
"physicalLocation": {
|
||||
"artifactLocation": { "uri": uri },
|
||||
|
|
@ -130,7 +141,50 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value {
|
|||
}
|
||||
}
|
||||
}]
|
||||
})
|
||||
});
|
||||
|
||||
// Build properties object
|
||||
let mut props = serde_json::Map::new();
|
||||
props.insert("category".into(), json!(d.category.to_string()));
|
||||
if let Some(conf) = d.confidence {
|
||||
props.insert("confidence".into(), json!(conf.to_string()));
|
||||
}
|
||||
|
||||
// Add rollup data if present
|
||||
if let Some(ref rollup) = d.rollup {
|
||||
props.insert(
|
||||
"rollup".into(),
|
||||
json!({
|
||||
"count": rollup.count,
|
||||
}),
|
||||
);
|
||||
|
||||
// Add rollup occurrences as relatedLocations
|
||||
let related: Vec<Value> = rollup
|
||||
.occurrences
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(idx, loc)| {
|
||||
json!({
|
||||
"id": idx,
|
||||
"physicalLocation": {
|
||||
"artifactLocation": { "uri": &uri },
|
||||
"region": {
|
||||
"startLine": loc.line,
|
||||
"startColumn": loc.col
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
if !related.is_empty() {
|
||||
result["relatedLocations"] = json!(related);
|
||||
}
|
||||
}
|
||||
|
||||
result["properties"] = Value::Object(props);
|
||||
|
||||
result
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
|
|
|||
|
|
@ -1,40 +1,95 @@
|
|||
use crate::patterns::{Pattern, Severity};
|
||||
use crate::evidence::Confidence;
|
||||
use crate::patterns::{Pattern, PatternCategory, PatternTier, Severity};
|
||||
|
||||
/// C AST patterns.
|
||||
///
|
||||
/// Taint rules cover `system`/`popen`/`exec*` (command injection),
|
||||
/// `sprintf`/`strcpy`/`strcat` (buffer overflow sinks), and `printf`/`fprintf`
|
||||
/// (format-string sinks). AST patterns here focus on **banned-by-default
|
||||
/// functions** (`gets`, `scanf %s`) and **format-string** variants not covered
|
||||
/// by taint, since these are dangerous regardless of data origin.
|
||||
pub const PATTERNS: &[Pattern] = &[
|
||||
// ── Tier A: Banned functions (always dangerous) ────────────────────
|
||||
Pattern {
|
||||
id: "strcpy_call",
|
||||
description: "strcpy() usage",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"strcpy\")) @vuln",
|
||||
id: "c.memory.gets",
|
||||
description: "gets() — no bounds checking, always exploitable",
|
||||
query: r#"(call_expression function: (identifier) @id (#eq? @id "gets")) @vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "strcat_call",
|
||||
description: "strcat() usage",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"strcat\")) @vuln",
|
||||
id: "c.memory.strcpy",
|
||||
description: "strcpy() — no bounds checking on destination buffer",
|
||||
query: r#"(call_expression function: (identifier) @id (#eq? @id "strcpy")) @vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "sprintf_call",
|
||||
description: "sprintf() (no length limit)",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"sprintf\")) @vuln",
|
||||
id: "c.memory.strcat",
|
||||
description: "strcat() — no bounds checking on destination buffer",
|
||||
query: r#"(call_expression function: (identifier) @id (#eq? @id "strcat")) @vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "gets_call",
|
||||
description: "gets() usage",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"gets\")) @vuln",
|
||||
id: "c.memory.sprintf",
|
||||
description: "sprintf() — no length limit on output buffer",
|
||||
query: r#"(call_expression function: (identifier) @id (#eq? @id "sprintf")) @vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "scanf_with_percent_s",
|
||||
description: "scanf(\"%s\") without length specifier",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"scanf\") arguments: (argument_list (string_literal) @fmt (#match? @fmt \".*%s.*\"))) @vuln",
|
||||
id: "c.memory.scanf_percent_s",
|
||||
description: "scanf(\"%s\") — unbounded string read",
|
||||
query: r#"(call_expression
|
||||
function: (identifier) @id (#eq? @id "scanf")
|
||||
arguments: (argument_list
|
||||
(string_literal) @fmt (#match? @fmt "%s")))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Command execution ──────────────────────────────────────
|
||||
Pattern {
|
||||
id: "c.cmdi.system",
|
||||
description: "system() — shell command execution",
|
||||
query: r#"(call_expression function: (identifier) @id (#eq? @id "system")) @vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CommandExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "system_call",
|
||||
description: "system() shell execution",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"system\")) @vuln",
|
||||
id: "c.cmdi.popen",
|
||||
description: "popen() — shell command execution with pipe",
|
||||
query: r#"(call_expression function: (identifier) @id (#eq? @id "popen")) @vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CommandExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Format-string ──────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "c.memory.printf_no_fmt",
|
||||
description: "printf(var) — format-string vulnerability when first arg is not literal",
|
||||
query: r#"(call_expression
|
||||
function: (identifier) @id (#eq? @id "printf")
|
||||
arguments: (argument_list
|
||||
. (identifier) @arg))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::B,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
];
|
||||
|
|
|
|||
|
|
@ -1,40 +1,106 @@
|
|||
use crate::patterns::{Pattern, Severity};
|
||||
use crate::evidence::Confidence;
|
||||
use crate::patterns::{Pattern, PatternCategory, PatternTier, Severity};
|
||||
|
||||
/// C++ AST patterns.
|
||||
///
|
||||
/// Inherits C banned-function concerns plus C++-specific patterns like
|
||||
/// `reinterpret_cast` and `const_cast`. Taint rules overlap with C rules
|
||||
/// for `system`/`sprintf`/`strcpy`/`strcat`.
|
||||
pub const PATTERNS: &[Pattern] = &[
|
||||
// ── Tier A: Banned C functions (inherited) ─────────────────────────
|
||||
Pattern {
|
||||
id: "strcpy_call",
|
||||
description: "strcpy() usage",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"strcpy\")) @vuln",
|
||||
id: "cpp.memory.gets",
|
||||
description: "gets() — no bounds checking, always exploitable",
|
||||
query: r#"(call_expression function: (identifier) @id (#eq? @id "gets")) @vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "strcat_call",
|
||||
description: "strcat() usage",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"strcat\")) @vuln",
|
||||
id: "cpp.memory.strcpy",
|
||||
description: "strcpy() — no bounds checking on destination buffer",
|
||||
query: r#"(call_expression function: (identifier) @id (#eq? @id "strcpy")) @vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "sprintf_call",
|
||||
description: "sprintf() (no length limit)",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"sprintf\")) @vuln",
|
||||
id: "cpp.memory.strcat",
|
||||
description: "strcat() — no bounds checking on destination buffer",
|
||||
query: r#"(call_expression function: (identifier) @id (#eq? @id "strcat")) @vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "gets_call",
|
||||
description: "gets() usage",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"gets\")) @vuln",
|
||||
id: "cpp.memory.sprintf",
|
||||
description: "sprintf() — no length limit on output buffer",
|
||||
query: r#"(call_expression function: (identifier) @id (#eq? @id "sprintf")) @vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Command execution ──────────────────────────────────────
|
||||
Pattern {
|
||||
id: "cpp.cmdi.system",
|
||||
description: "system() — shell command execution",
|
||||
query: r#"(call_expression function: (identifier) @id (#eq? @id "system")) @vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CommandExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "system_call",
|
||||
description: "system() shell execution",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"system\")) @vuln",
|
||||
id: "cpp.cmdi.popen",
|
||||
description: "popen() — shell command execution",
|
||||
query: r#"(call_expression function: (identifier) @id (#eq? @id "popen")) @vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CommandExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Dangerous casts ────────────────────────────────────────
|
||||
// C++ casts are parsed as call_expression with template_function
|
||||
Pattern {
|
||||
id: "cpp.memory.reinterpret_cast",
|
||||
description: "reinterpret_cast — type-punning cast",
|
||||
query: r#"(call_expression
|
||||
function: (template_function
|
||||
name: (identifier) @n (#eq? @n "reinterpret_cast")))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "reinterpret_cast",
|
||||
description: "reinterpret_cast usage",
|
||||
query: "(reinterpret_cast_expression) @vuln",
|
||||
id: "cpp.memory.const_cast",
|
||||
description: "const_cast — removes const/volatile qualifier",
|
||||
query: r#"(call_expression
|
||||
function: (template_function
|
||||
name: (identifier) @n (#eq? @n "const_cast")))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier B: Format-string (variable first arg) ─────────────────────
|
||||
Pattern {
|
||||
id: "cpp.memory.printf_no_fmt",
|
||||
description: "printf(var) — format-string vulnerability when first arg is not literal",
|
||||
query: r#"(call_expression
|
||||
function: (identifier) @id (#eq? @id "printf")
|
||||
arguments: (argument_list
|
||||
. (identifier) @arg))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::B,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
];
|
||||
|
|
|
|||
|
|
@ -1,34 +1,120 @@
|
|||
use crate::patterns::{Pattern, Severity};
|
||||
use crate::evidence::Confidence;
|
||||
use crate::patterns::{Pattern, PatternCategory, PatternTier, Severity};
|
||||
|
||||
/// Go AST patterns.
|
||||
///
|
||||
/// Taint rules cover `exec.Command` (command injection), `db.Query`/`db.Exec`
|
||||
/// (SQL sinks). AST patterns here focus on **TLS misconfiguration**,
|
||||
/// **weak crypto**, **unsafe.Pointer**, and **hardcoded secrets**.
|
||||
pub const PATTERNS: &[Pattern] = &[
|
||||
// ── Tier A: Command execution ──────────────────────────────────────
|
||||
Pattern {
|
||||
id: "exec_command",
|
||||
description: "os/exec Command construction",
|
||||
query: "(call_expression function: (selector_expression field: (field_identifier) @f (#eq? @f \"Command\"))) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "http_insecure_tls",
|
||||
description: "&http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}",
|
||||
query: "(composite_literal type: (selector_expression field: (field_identifier) @t (#eq? @t \"Transport\")) body: (literal_value (keyed_element key: (identifier) @k (#eq? @k \"TLSClientConfig\") value: (composite_literal body: (literal_value (keyed_element key: (identifier) @ik (#eq? @ik \"InsecureSkipVerify\") value: (true)))))) @vuln",
|
||||
id: "go.cmdi.exec_command",
|
||||
description: "exec.Command() — arbitrary process execution",
|
||||
query: r#"(call_expression
|
||||
function: (selector_expression
|
||||
field: (field_identifier) @f (#eq? @f "Command")))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CommandExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Unsafe pointer ─────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "unsafe_pointer",
|
||||
description: "Use of unsafe.Pointer",
|
||||
query: "(qualified_type type: (selector_expression field: (field_identifier) @f (#eq? @f \"Pointer\"))) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "md5_sha1",
|
||||
description: "crypto/md5 or crypto/sha1 usage",
|
||||
query: "(call_expression function: (selector_expression object: (identifier) @pkg (#match? @pkg \"md5|sha1\"))) @vuln",
|
||||
id: "go.memory.unsafe_pointer",
|
||||
description: "unsafe.Pointer — bypasses Go type system",
|
||||
query: r#"(call_expression
|
||||
function: (selector_expression
|
||||
operand: (identifier) @pkg (#eq? @pkg "unsafe")
|
||||
field: (field_identifier) @f (#eq? @f "Pointer")))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: TLS misconfiguration ───────────────────────────────────
|
||||
Pattern {
|
||||
id: "hardcoded_secret",
|
||||
description: "Hard-coded string that looks like an API key/token",
|
||||
query: "(interpreted_string_literal) @s (#match? @s \"(?i)(api|secret|token|password)[=:]?[ \\t]*[A-Za-z0-9_\\-]{8,}\")",
|
||||
id: "go.transport.insecure_skip_verify",
|
||||
description: "InsecureSkipVerify: true — disables TLS certificate validation",
|
||||
query: r#"(keyed_element
|
||||
(literal_element
|
||||
(identifier) @k (#eq? @k "InsecureSkipVerify"))
|
||||
(literal_element (true)))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::InsecureTransport,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Weak crypto ────────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "go.crypto.md5",
|
||||
description: "md5.New() / md5.Sum() — weak hash algorithm",
|
||||
query: r#"(call_expression
|
||||
function: (selector_expression
|
||||
operand: (identifier) @pkg (#eq? @pkg "md5")))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Crypto,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "go.crypto.sha1",
|
||||
description: "sha1.New() / sha1.Sum() — weak hash algorithm",
|
||||
query: r#"(call_expression
|
||||
function: (selector_expression
|
||||
operand: (identifier) @pkg (#eq? @pkg "sha1")))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Crypto,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
// ── Tier B: SQL injection (concatenation heuristic) ────────────────
|
||||
Pattern {
|
||||
id: "go.sqli.query_concat",
|
||||
description: "db.Query/Exec with concatenated string argument",
|
||||
query: r#"(call_expression
|
||||
function: (selector_expression
|
||||
field: (field_identifier) @f (#match? @f "^(Query|Exec|QueryRow)$"))
|
||||
arguments: (argument_list
|
||||
(binary_expression) @concat))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::B,
|
||||
category: PatternCategory::SqlInjection,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
// ── Tier A: Hardcoded secrets ──────────────────────────────────────
|
||||
Pattern {
|
||||
id: "go.secrets.hardcoded_key",
|
||||
description: "Variable with secret-like name assigned a string literal",
|
||||
query: r#"(short_var_declaration
|
||||
left: (expression_list
|
||||
(identifier) @name (#match? @name "(?i)(password|secret|api_?key|token|private_?key)"))
|
||||
right: (expression_list
|
||||
(interpreted_string_literal) @val))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Secrets,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Deserialization ────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "go.deser.gob_decode",
|
||||
description: "gob.NewDecoder — Go binary deserialization",
|
||||
query: r#"(call_expression
|
||||
function: (selector_expression
|
||||
operand: (identifier) @pkg (#eq? @pkg "gob")
|
||||
field: (field_identifier) @f (#eq? @f "NewDecoder")))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Deserialization,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
];
|
||||
|
|
|
|||
|
|
@ -1,40 +1,116 @@
|
|||
use crate::patterns::{Pattern, Severity};
|
||||
use crate::evidence::Confidence;
|
||||
use crate::patterns::{Pattern, PatternCategory, PatternTier, Severity};
|
||||
|
||||
/// Java AST patterns.
|
||||
///
|
||||
/// Taint rules cover `Runtime.exec` (command injection) and
|
||||
/// `executeQuery`/`executeUpdate`/`prepareStatement` (SQL sinks).
|
||||
/// AST patterns here focus on **deserialization**, **reflection**,
|
||||
/// **SQL with concatenation** (Tier B heuristic), and **weak crypto**.
|
||||
pub const PATTERNS: &[Pattern] = &[
|
||||
// ── Tier A: Deserialization ────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "runtime_exec",
|
||||
description: "Runtime.getRuntime().exec(...) – arbitrary-command execution",
|
||||
query: "(method_invocation object: (method_invocation name: (identifier) @n (#eq? @n \"getRuntime\")) name: (identifier) @id (#eq? @id \"exec\")) @vuln",
|
||||
id: "java.deser.readobject",
|
||||
description: "ObjectInputStream.readObject() — unsafe deserialization",
|
||||
// Match any .readObject() call — the method name is specific enough.
|
||||
query: r#"(method_invocation
|
||||
name: (identifier) @id (#eq? @id "readObject"))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Deserialization,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Command execution ──────────────────────────────────────
|
||||
Pattern {
|
||||
id: "class_for_name",
|
||||
description: "Dynamic reflection via Class.forName(...)",
|
||||
query: "(method_invocation object: (identifier) @c (#eq? @c \"Class\") name: (identifier) @id (#eq? @id \"forName\")) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "object_deserialization",
|
||||
description: "java.io.ObjectInputStream#readObject() deserialization",
|
||||
query: "(method_invocation object: (identifier) @o (#eq? @o \"ObjectInputStream\") name: (identifier) @id (#eq? @id \"readObject\")) @vuln",
|
||||
id: "java.cmdi.runtime_exec",
|
||||
description: "Runtime.getRuntime().exec() — shell command execution",
|
||||
query: r#"(method_invocation
|
||||
object: (method_invocation
|
||||
name: (identifier) @n (#eq? @n "getRuntime"))
|
||||
name: (identifier) @id (#eq? @id "exec"))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CommandExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Reflection ─────────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "insecure_random",
|
||||
description: "java.util.Random used where SecureRandom is expected",
|
||||
query: "(object_creation_expression type: (identifier) @t (#eq? @t \"Random\")) @vuln",
|
||||
id: "java.reflection.class_forname",
|
||||
description: "Class.forName() — dynamic class loading",
|
||||
query: r#"(method_invocation
|
||||
object: (identifier) @c (#eq? @c "Class")
|
||||
name: (identifier) @id (#eq? @id "forName"))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Reflection,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "thread_stop",
|
||||
description: "Deprecated Thread.stop() invocation",
|
||||
query: "(method_invocation name: (identifier) @id (#eq? @id \"stop\") object: (identifier) @obj (#eq? @obj \"Thread\")) @vuln",
|
||||
id: "java.reflection.method_invoke",
|
||||
description: "Method.invoke() — reflective method invocation",
|
||||
query: r#"(method_invocation
|
||||
name: (identifier) @id (#eq? @id "invoke"))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Reflection,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier B: SQL injection (concatenation heuristic) ────────────────
|
||||
Pattern {
|
||||
id: "java.sqli.execute_concat",
|
||||
description: "SQL execute with concatenated string argument",
|
||||
query: r#"(method_invocation
|
||||
name: (identifier) @id (#match? @id "^execute(Query|Update)?$")
|
||||
arguments: (argument_list
|
||||
(binary_expression) @concat))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::B,
|
||||
category: PatternCategory::SqlInjection,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
// ── Tier A: Weak crypto ────────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "java.crypto.insecure_random",
|
||||
description: "new Random() — java.util.Random is not cryptographically secure",
|
||||
query: r#"(object_creation_expression
|
||||
type: (type_identifier) @t (#eq? @t "Random"))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Crypto,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "sql_concat",
|
||||
description: "SQL built with string concatenation",
|
||||
query: "(method_invocation name: (identifier) @id (#match? @id \"execute(Query|Update)?\") arguments: (argument_list (binary_expression) @concat)) @vuln",
|
||||
id: "java.crypto.weak_digest",
|
||||
description: "MessageDigest.getInstance(\"MD5\"/\"SHA1\") — weak hash algorithm",
|
||||
query: r#"(method_invocation
|
||||
object: (identifier) @c (#eq? @c "MessageDigest")
|
||||
name: (identifier) @id (#eq? @id "getInstance")
|
||||
arguments: (argument_list
|
||||
(string_literal) @alg (#match? @alg "(?i)(md5|sha-?1)")))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Crypto,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
// ── Tier A: XSS (servlet) ──────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "java.xss.getwriter_print",
|
||||
description: "response.getWriter().print/println — direct output without encoding",
|
||||
query: r#"(method_invocation
|
||||
object: (method_invocation
|
||||
name: (identifier) @gw (#eq? @gw "getWriter"))
|
||||
name: (identifier) @id (#match? @id "^(print|println|write)$"))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Xss,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
];
|
||||
|
|
|
|||
|
|
@ -1,117 +1,182 @@
|
|||
use crate::patterns::{Pattern, Severity};
|
||||
use crate::evidence::Confidence;
|
||||
use crate::patterns::{Pattern, PatternCategory, PatternTier, Severity};
|
||||
|
||||
/// JavaScript AST patterns.
|
||||
///
|
||||
/// Taint rules cover `eval` (code injection), `innerHTML` (XSS),
|
||||
/// `location.href` (open redirect), and `child_process.exec/spawn` (command
|
||||
/// injection). AST patterns here add **new Function()**, **document.write**,
|
||||
/// **setTimeout with string**, **deserialization**, **prototype pollution**,
|
||||
/// **XSS sinks** not covered by taint, and **weak crypto**.
|
||||
pub const PATTERNS: &[Pattern] = &[
|
||||
// ── Tier A: Code execution ─────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "eval_call",
|
||||
description: "Use of eval()",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"eval\")) @vuln",
|
||||
id: "js.code_exec.eval",
|
||||
description: "eval() — dynamic code execution",
|
||||
query: r#"(call_expression
|
||||
function: (identifier) @id (#eq? @id "eval"))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "new_function",
|
||||
description: "new Function() constructor",
|
||||
query: "(new_expression constructor: (identifier) @id (#eq? @id \"Function\")) @vuln",
|
||||
id: "js.code_exec.new_function",
|
||||
description: "new Function() constructor — eval equivalent",
|
||||
query: r#"(new_expression
|
||||
constructor: (identifier) @id (#eq? @id "Function"))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "document_write",
|
||||
description: "document.write() call",
|
||||
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"write\"))) @vuln",
|
||||
id: "js.code_exec.settimeout_string",
|
||||
description: "setTimeout/setInterval with string argument — implicit eval",
|
||||
query: r#"(call_expression
|
||||
function: (identifier) @id (#match? @id "^(setTimeout|setInterval)$")
|
||||
arguments: (arguments (string) @code))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: XSS sinks ──────────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "settimeout_string",
|
||||
description: "setTimeout / setInterval with a string argument",
|
||||
query: "(call_expression function: (identifier) @id (#match? @id \"setTimeout|setInterval\") arguments: (arguments (string) @code . _)) @vuln",
|
||||
id: "js.xss.document_write",
|
||||
description: "document.write() — XSS sink",
|
||||
query: r#"(call_expression
|
||||
function: (member_expression
|
||||
object: (identifier) @obj (#eq? @obj "document")
|
||||
property: (property_identifier) @prop (#match? @prop "^(write|writeln)$")))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Xss,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "json_parse",
|
||||
description: "JSON.parse on dynamic string",
|
||||
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"JSON\") property: (property_identifier) @prop (#eq? @prop \"parse\"))) @vuln",
|
||||
id: "js.xss.outer_html",
|
||||
description: "Assignment to .outerHTML — XSS sink",
|
||||
query: r#"(assignment_expression
|
||||
left: (member_expression
|
||||
property: (property_identifier) @prop (#eq? @prop "outerHTML")))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Xss,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "js.xss.insert_adjacent_html",
|
||||
description: "insertAdjacentHTML() — XSS sink",
|
||||
query: r#"(call_expression
|
||||
function: (member_expression
|
||||
property: (property_identifier) @prop (#eq? @prop "insertAdjacentHTML")))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Xss,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Prototype pollution ────────────────────────────────────
|
||||
Pattern {
|
||||
id: "js.prototype.proto_assignment",
|
||||
description: "Assignment to __proto__ — prototype pollution",
|
||||
query: r#"(assignment_expression
|
||||
left: (member_expression
|
||||
property: (property_identifier) @prop (#eq? @prop "__proto__")))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Prototype,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "js.prototype.extend_object",
|
||||
description: "Assignment to Object.prototype — prototype mutation",
|
||||
query: r#"(assignment_expression
|
||||
left: (member_expression
|
||||
object: (member_expression
|
||||
object: (identifier) @obj (#eq? @obj "Object")
|
||||
property: (property_identifier) @mid (#eq? @mid "prototype"))))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Prototype,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Weak crypto ────────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "js.crypto.weak_hash",
|
||||
description: "crypto.createHash with weak algorithm (md5/sha1)",
|
||||
query: r#"(call_expression
|
||||
function: (member_expression
|
||||
property: (property_identifier) @prop (#eq? @prop "createHash"))
|
||||
arguments: (arguments
|
||||
(string) @alg (#match? @alg "\"(md5|sha1)\"")))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Crypto,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "outer_html_assignment",
|
||||
description: "Assignment to element.outerHTML",
|
||||
query: "(assignment_expression
|
||||
left: (member_expression
|
||||
property: (property_identifier) @prop
|
||||
(#eq? @prop \"outerHTML\"))) @vuln",
|
||||
id: "js.crypto.math_random",
|
||||
description: "Math.random() — not cryptographically secure",
|
||||
query: r#"(call_expression
|
||||
function: (member_expression
|
||||
object: (identifier) @obj (#eq? @obj "Math")
|
||||
property: (property_identifier) @prop (#eq? @prop "random")))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Crypto,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
// ── Tier A: Open redirect ──────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "js.xss.location_assign",
|
||||
description: "Assignment to location/location.href — open redirect",
|
||||
query: r#"(assignment_expression
|
||||
left: (member_expression
|
||||
object: (identifier) @obj (#match? @obj "^(window|location|document)$")
|
||||
property: (property_identifier) @prop (#match? @prop "^(location|href)$")))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Xss,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Insecure transport ─────────────────────────────────────
|
||||
Pattern {
|
||||
id: "insert_adjacent_html",
|
||||
description: "insertAdjacentHTML() call",
|
||||
query: "(call_expression
|
||||
function: (member_expression
|
||||
property: (property_identifier) @prop
|
||||
(#eq? @prop \"insertAdjacentHTML\"))) @vuln",
|
||||
severity: Severity::Medium,
|
||||
id: "js.transport.fetch_http",
|
||||
description: "fetch() over plain HTTP",
|
||||
query: r#"(call_expression
|
||||
function: (identifier) @id (#eq? @id "fetch")
|
||||
arguments: (arguments
|
||||
(string) @url (#match? @url "^\"http://")))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::InsecureTransport,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
// ── Tier A: Cookie manipulation ────────────────────────────────────
|
||||
Pattern {
|
||||
id: "location_href_assignment",
|
||||
description: "Assignment to window.location / location.href",
|
||||
query: "(assignment_expression
|
||||
left: (member_expression
|
||||
object: (identifier) @obj
|
||||
(#match? @obj \"^(window|location|document|self|top|parent|frames)$\")
|
||||
property: (property_identifier) @prop
|
||||
(#match? @prop \"^(location|href)$\"))) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "cookie_assignment",
|
||||
id: "js.xss.cookie_write",
|
||||
description: "Write to document.cookie",
|
||||
query: "(assignment_expression
|
||||
left: (member_expression
|
||||
object: (identifier) @obj
|
||||
(#eq? @obj \"document\")
|
||||
property: (property_identifier) @prop
|
||||
(#eq? @prop \"cookie\"))) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "proto_pollution",
|
||||
description: "Assignment to __proto__ (prototype pollution)",
|
||||
query: "(assignment_expression
|
||||
left: (member_expression
|
||||
property: (property_identifier) @prop
|
||||
(#eq? @prop \"__proto__\"))) @vuln",
|
||||
severity: Severity::Low,
|
||||
},
|
||||
Pattern {
|
||||
id: "weak_hash_md5",
|
||||
description: "crypto.createHash(\"md5\")",
|
||||
query: "(call_expression
|
||||
function: (member_expression
|
||||
object: (identifier) @obj
|
||||
(#eq? @obj \"crypto\")
|
||||
property: (property_identifier) @prop
|
||||
(#eq? @prop \"createHash\"))
|
||||
arguments: (arguments
|
||||
(string) @alg
|
||||
(#eq? @alg \"md5\"))) @vuln",
|
||||
severity: Severity::Low,
|
||||
},
|
||||
Pattern {
|
||||
id: "regexp_constructor_string",
|
||||
description: "new RegExp() with a dynamic string",
|
||||
query: "(new_expression
|
||||
constructor: (identifier) @id
|
||||
(#eq? @id \"RegExp\")
|
||||
arguments: (arguments (string) @pattern)) @vuln",
|
||||
severity: Severity::Low,
|
||||
},
|
||||
Pattern {
|
||||
id: "dangerous_extend_builtin",
|
||||
description: "Extending Object.prototype (may lead to collisions/pollution)",
|
||||
query: "(assignment_expression
|
||||
left: (member_expression
|
||||
object: (identifier) @obj
|
||||
(#eq? @obj \"Object\")
|
||||
property: (property_identifier) @prop
|
||||
(#eq? @prop \"prototype\"))) @vuln",
|
||||
query: r#"(assignment_expression
|
||||
left: (member_expression
|
||||
object: (identifier) @obj (#eq? @obj "document")
|
||||
property: (property_identifier) @prop (#eq? @prop "cookie")))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Xss,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
];
|
||||
|
|
|
|||
|
|
@ -1,3 +1,43 @@
|
|||
//! # AST Pattern Conventions
|
||||
//!
|
||||
//! Each language file exports a `PATTERNS` slice of [`Pattern`] structs.
|
||||
//!
|
||||
//! ## ID format
|
||||
//!
|
||||
//! `<lang>.<category>.<specific>` — e.g. `java.deser.readobject`, `py.cmdi.os_system`.
|
||||
//!
|
||||
//! Language prefixes: `rs`, `java`, `py`, `js`, `ts`, `c`, `cpp`, `go`, `php`, `rb`.
|
||||
//!
|
||||
//! ## Tiers
|
||||
//!
|
||||
//! * **Tier A** — structural presence is high-signal (e.g. `gets()`, `eval()`).
|
||||
//! * **Tier B** — requires a heuristic guard in the query (e.g. SQL with concatenated
|
||||
//! arg, format-string with variable first arg).
|
||||
//!
|
||||
//! ## Severity
|
||||
//!
|
||||
//! * **High** — command exec, deserialization, banned C functions.
|
||||
//! * **Medium** — SQL concat, reflection, XSS sinks, casts.
|
||||
//! * **Low** — weak crypto, insecure randomness, code-quality (`unwrap`/`expect`/`panic`).
|
||||
//!
|
||||
//! Note: the default `min_severity` filter skips Low patterns; they only appear when
|
||||
//! the user explicitly lowers the threshold.
|
||||
//!
|
||||
//! ## No-duplicate rule
|
||||
//!
|
||||
//! If a vulnerability class is already detected by taint analysis (e.g. `eval` as a
|
||||
//! sink, `system` as a sink), the AST pattern is still kept for `--ast-only` mode but
|
||||
//! uses a distinct ID namespace (`js.code_exec.eval` vs `taint-unsanitised-flow`).
|
||||
//! The dedup pass in `ast.rs` prevents exact-duplicate findings at the same location.
|
||||
//!
|
||||
//! ## Adding a new pattern
|
||||
//!
|
||||
//! 1. Pick the language file under `src/patterns/<lang>.rs`.
|
||||
//! 2. Choose tier, category, severity per the rules above.
|
||||
//! 3. Write the tree-sitter query — test with `cargo test --test pattern_tests`.
|
||||
//! 4. Add a snippet to `tests/fixtures/patterns/<lang>/positive.<ext>`.
|
||||
//! 5. Add the ID to the positive test assertion in `tests/pattern_tests.rs`.
|
||||
|
||||
pub mod c;
|
||||
pub mod cpp;
|
||||
mod go;
|
||||
|
|
@ -9,6 +49,7 @@ mod ruby;
|
|||
pub mod rust;
|
||||
pub mod typescript;
|
||||
|
||||
use crate::evidence::Confidence;
|
||||
use console::style;
|
||||
use once_cell::sync::Lazy;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
|
@ -16,7 +57,7 @@ use std::collections::HashMap;
|
|||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
|
||||
pub enum Severity {
|
||||
High,
|
||||
Medium,
|
||||
|
|
@ -28,13 +69,14 @@ impl Severity {
|
|||
///
|
||||
/// Returns e.g. `"[HIGH] "` or `"[MEDIUM]"` — always 8 visible characters
|
||||
/// so the column after the tag lines up regardless of severity.
|
||||
#[allow(dead_code)] // public API for lib consumers
|
||||
pub fn colored_tag(self) -> String {
|
||||
// Visible widths: "[HIGH]" = 6, "[MEDIUM]" = 8, "[LOW]" = 5.
|
||||
// Pad the *whole* tag to 8 visible chars (the longest, "[MEDIUM]").
|
||||
let (label, styled_fn): (&str, fn(&str) -> String) = match self {
|
||||
Severity::High => ("HIGH", |s| style(s).red().bold().to_string()),
|
||||
Severity::Medium => ("MEDIUM", |s| style(s).yellow().bold().to_string()),
|
||||
Severity::Low => ("LOW", |s| style(s).cyan().bold().to_string()),
|
||||
Severity::Medium => ("MEDIUM", |s| style(s).color256(208).bold().to_string()),
|
||||
Severity::Low => ("LOW", |s| style(s).color256(67).to_string()),
|
||||
};
|
||||
let bracket_len = label.len() + 2; // "[" + label + "]"
|
||||
let pad = 8usize.saturating_sub(bracket_len);
|
||||
|
|
@ -46,8 +88,8 @@ impl fmt::Display for Severity {
|
|||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let styled = match *self {
|
||||
Severity::High => style("HIGH").red().bold().to_string(),
|
||||
Severity::Medium => style("MEDIUM").yellow().bold().to_string(),
|
||||
Severity::Low => style("LOW").cyan().bold().to_string(),
|
||||
Severity::Medium => style("MEDIUM").color256(208).bold().to_string(),
|
||||
Severity::Low => style("LOW").color256(67).to_string(),
|
||||
};
|
||||
f.write_str(&styled)
|
||||
}
|
||||
|
|
@ -65,14 +107,132 @@ impl Severity {
|
|||
}
|
||||
|
||||
impl FromStr for Severity {
|
||||
// TODO: FIX
|
||||
type Err = ();
|
||||
type Err = String;
|
||||
|
||||
fn from_str(input: &str) -> Result<Self, Self::Err> {
|
||||
match input.to_lowercase().as_str() {
|
||||
"medium" => Ok(Severity::Medium),
|
||||
"high" => Ok(Severity::High),
|
||||
_ => Ok(Severity::Low),
|
||||
match input.trim().to_ascii_uppercase().as_str() {
|
||||
"HIGH" => Ok(Severity::High),
|
||||
"MEDIUM" | "MED" => Ok(Severity::Medium),
|
||||
"LOW" => Ok(Severity::Low),
|
||||
other => Err(format!("unknown severity: '{other}'")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A parsed severity filter expression.
|
||||
///
|
||||
/// Supports three forms:
|
||||
/// - Single level: `"HIGH"` — matches only that level
|
||||
/// - Comma list: `"HIGH,MEDIUM"` — matches any listed level
|
||||
/// - Threshold: `">=MEDIUM"` — matches that level and above
|
||||
///
|
||||
/// Parsing is case-insensitive and tolerates whitespace around tokens.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum SeverityFilter {
|
||||
/// Match findings at or above this level (High >= Medium >= Low).
|
||||
AtLeast(Severity),
|
||||
/// Match findings whose severity is in this exact set.
|
||||
AnyOf(Vec<Severity>),
|
||||
}
|
||||
|
||||
impl SeverityFilter {
|
||||
/// Parse a severity filter expression.
|
||||
///
|
||||
/// Examples: `"HIGH"`, `"high,medium"`, `">=MEDIUM"`, `">= low"`.
|
||||
pub fn parse(expr: &str) -> Result<Self, String> {
|
||||
let trimmed = expr.trim();
|
||||
if trimmed.is_empty() {
|
||||
return Err("empty severity expression".into());
|
||||
}
|
||||
|
||||
// Threshold form: >=LEVEL
|
||||
if let Some(rest) = trimmed.strip_prefix(">=") {
|
||||
let level: Severity = rest.parse()?;
|
||||
return Ok(SeverityFilter::AtLeast(level));
|
||||
}
|
||||
|
||||
// Comma-separated list (also handles single value)
|
||||
let levels: Result<Vec<Severity>, String> = trimmed
|
||||
.split(',')
|
||||
.map(|tok| tok.trim().parse::<Severity>())
|
||||
.collect();
|
||||
let levels = levels?;
|
||||
if levels.is_empty() {
|
||||
return Err("empty severity expression".into());
|
||||
}
|
||||
// Optimise single-value list
|
||||
if levels.len() == 1 {
|
||||
return Ok(SeverityFilter::AnyOf(levels));
|
||||
}
|
||||
Ok(SeverityFilter::AnyOf(levels))
|
||||
}
|
||||
|
||||
/// Returns `true` if the given severity passes this filter.
|
||||
pub fn matches(&self, sev: Severity) -> bool {
|
||||
match self {
|
||||
SeverityFilter::AtLeast(threshold) => {
|
||||
// Severity ordering: High < Medium < Low (derived Ord).
|
||||
// "at least Medium" means sev <= Medium in Ord terms.
|
||||
sev <= *threshold
|
||||
}
|
||||
SeverityFilter::AnyOf(set) => set.contains(&sev),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Pattern confidence tier.
|
||||
///
|
||||
/// * **A** – Structural presence alone is high-signal (e.g. `gets()`, `eval()`).
|
||||
/// * **B** – Requires a simple heuristic guard in the query (e.g. SQL with
|
||||
/// concatenated arg, file-open with non-literal path).
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize)]
|
||||
pub enum PatternTier {
|
||||
A,
|
||||
B,
|
||||
}
|
||||
|
||||
/// High-level finding category for noise reduction and prioritization.
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
|
||||
pub enum FindingCategory {
|
||||
Security,
|
||||
Reliability,
|
||||
Quality,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for FindingCategory {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
FindingCategory::Security => write!(f, "Security"),
|
||||
FindingCategory::Reliability => write!(f, "Reliability"),
|
||||
FindingCategory::Quality => write!(f, "Quality"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Vulnerability class that a pattern detects.
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize)]
|
||||
pub enum PatternCategory {
|
||||
CommandExec,
|
||||
CodeExec,
|
||||
Deserialization,
|
||||
SqlInjection,
|
||||
PathTraversal,
|
||||
Xss,
|
||||
Crypto,
|
||||
Secrets,
|
||||
InsecureTransport,
|
||||
Reflection,
|
||||
MemorySafety,
|
||||
Prototype,
|
||||
CodeQuality,
|
||||
}
|
||||
|
||||
impl PatternCategory {
|
||||
/// Map this vulnerability class to a high-level finding category.
|
||||
pub fn finding_category(self) -> FindingCategory {
|
||||
match self {
|
||||
PatternCategory::CodeQuality => FindingCategory::Quality,
|
||||
_ => FindingCategory::Security,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -80,7 +240,7 @@ impl FromStr for Severity {
|
|||
/// One AST pattern with a tree-sitter query and meta-data.
|
||||
#[derive(Debug, Clone, Serialize, PartialEq)]
|
||||
pub struct Pattern {
|
||||
/// Unique identifier (snake-case preferred).
|
||||
/// Unique identifier — `<lang>.<category>.<specific>` preferred.
|
||||
pub id: &'static str,
|
||||
/// Human-readable explanation.
|
||||
pub description: &'static str,
|
||||
|
|
@ -88,6 +248,12 @@ pub struct Pattern {
|
|||
pub query: &'static str,
|
||||
/// Rough severity bucket.
|
||||
pub severity: Severity,
|
||||
/// Confidence tier (A = structural, B = heuristic-guarded).
|
||||
pub tier: PatternTier,
|
||||
/// Vulnerability class.
|
||||
pub category: PatternCategory,
|
||||
/// Confidence level for findings produced by this pattern.
|
||||
pub confidence: Confidence,
|
||||
}
|
||||
|
||||
/// Global, lazily-initialised registry: lang-name → pattern slice
|
||||
|
|
@ -164,3 +330,66 @@ fn load_returns_correct_pattern_slices() {
|
|||
|
||||
assert!(load("brainfuck").is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn severity_from_str_rejects_unknown() {
|
||||
assert!("garbage".parse::<Severity>().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn severity_filter_single() {
|
||||
let f = SeverityFilter::parse("HIGH").unwrap();
|
||||
assert!(f.matches(Severity::High));
|
||||
assert!(!f.matches(Severity::Medium));
|
||||
assert!(!f.matches(Severity::Low));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn severity_filter_comma_list() {
|
||||
let f = SeverityFilter::parse("HIGH,MEDIUM").unwrap();
|
||||
assert!(f.matches(Severity::High));
|
||||
assert!(f.matches(Severity::Medium));
|
||||
assert!(!f.matches(Severity::Low));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn severity_filter_threshold() {
|
||||
let f = SeverityFilter::parse(">=MEDIUM").unwrap();
|
||||
assert!(f.matches(Severity::High));
|
||||
assert!(f.matches(Severity::Medium));
|
||||
assert!(!f.matches(Severity::Low));
|
||||
|
||||
let f2 = SeverityFilter::parse(">=LOW").unwrap();
|
||||
assert!(f2.matches(Severity::High));
|
||||
assert!(f2.matches(Severity::Medium));
|
||||
assert!(f2.matches(Severity::Low));
|
||||
|
||||
let f3 = SeverityFilter::parse(">=HIGH").unwrap();
|
||||
assert!(f3.matches(Severity::High));
|
||||
assert!(!f3.matches(Severity::Medium));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn severity_filter_case_insensitive_and_whitespace() {
|
||||
let f = SeverityFilter::parse(" high , medium ").unwrap();
|
||||
assert!(f.matches(Severity::High));
|
||||
assert!(f.matches(Severity::Medium));
|
||||
assert!(!f.matches(Severity::Low));
|
||||
|
||||
let f2 = SeverityFilter::parse(">= medium").unwrap();
|
||||
assert!(f2.matches(Severity::High));
|
||||
assert!(f2.matches(Severity::Medium));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn severity_filter_rejects_empty() {
|
||||
assert!(SeverityFilter::parse("").is_err());
|
||||
assert!(SeverityFilter::parse(" ").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn severity_filter_rejects_invalid_level() {
|
||||
assert!(SeverityFilter::parse("CRITICAL").is_err());
|
||||
assert!(SeverityFilter::parse("HIGH,CRITICAL").is_err());
|
||||
assert!(SeverityFilter::parse(">=BOGUS").is_err());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,40 +1,144 @@
|
|||
use crate::patterns::{Pattern, Severity};
|
||||
use crate::evidence::Confidence;
|
||||
use crate::patterns::{Pattern, PatternCategory, PatternTier, Severity};
|
||||
|
||||
/// PHP AST patterns.
|
||||
///
|
||||
/// Taint rules cover `system`/`exec`/`passthru`/`shell_exec` (command
|
||||
/// injection), `echo`/`print` (XSS sinks), and `mysqli_query`/`pg_query`
|
||||
/// (SQL sinks). AST patterns here focus on **eval**, **deserialization**,
|
||||
/// **deprecated dangerous functions**, **include with variable**, and
|
||||
/// **SQL concatenation** (Tier B).
|
||||
pub const PATTERNS: &[Pattern] = &[
|
||||
// ── Tier A: Code execution ─────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "eval_call",
|
||||
description: "eval($code) execution",
|
||||
query: "(function_call_expression function: (name) @n (#eq? @n \"eval\")) @vuln",
|
||||
id: "php.code_exec.eval",
|
||||
description: "eval() — dynamic code execution",
|
||||
query: r#"(function_call_expression
|
||||
function: (name) @n (#eq? @n "eval"))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "preg_replace_e",
|
||||
description: "preg_replace with deprecated /e modifier",
|
||||
query: "(function_call_expression function: (name) @n (#eq? @n \"preg_replace\") arguments: (arguments (string) @pat (#match? @pat \"/.*e.*$/\"))) @vuln",
|
||||
id: "php.code_exec.create_function",
|
||||
description: "create_function() — deprecated eval-like constructor",
|
||||
query: r#"(function_call_expression
|
||||
function: (name) @n (#eq? @n "create_function"))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "create_function",
|
||||
description: "create_function(...) anonymous eval-like",
|
||||
query: "(function_call_expression function: (name) @n (#eq? @n \"create_function\")) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "unserialize_call",
|
||||
description: "unserialize(...) on user input",
|
||||
query: "(function_call_expression function: (name) @n (#eq? @n \"unserialize\")) @vuln",
|
||||
id: "php.code_exec.preg_replace_e",
|
||||
description: "preg_replace with /e modifier — code execution via regex",
|
||||
query: r#"(function_call_expression
|
||||
function: (name) @n (#eq? @n "preg_replace")
|
||||
arguments: (arguments
|
||||
(argument
|
||||
(string) @pat (#match? @pat "/[^/]*/[a-zA-Z]*e"))))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "mysql_query_concat",
|
||||
description: "mysql_query with concatenated SQL",
|
||||
query: "(function_call_expression function: (name) @n (#eq? @n \"mysql_query\") arguments: (arguments (binary_expression) @concat)) @vuln",
|
||||
id: "php.code_exec.assert_string",
|
||||
description: "assert() with string argument — evaluates PHP code",
|
||||
query: r#"(function_call_expression
|
||||
function: (name) @n (#eq? @n "assert")
|
||||
arguments: (arguments
|
||||
(argument (string) @code)))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Command execution ──────────────────────────────────────
|
||||
Pattern {
|
||||
id: "php.cmdi.system",
|
||||
description: "system/shell_exec/exec/passthru — shell command execution",
|
||||
query: r#"(function_call_expression
|
||||
function: (name) @n (#match? @n "^(system|shell_exec|exec|passthru|proc_open|popen)$"))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CommandExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Deserialization ────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "php.deser.unserialize",
|
||||
description: "unserialize() — PHP object injection",
|
||||
query: r#"(function_call_expression
|
||||
function: (name) @n (#eq? @n "unserialize"))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Deserialization,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier B: SQL injection (concatenation heuristic) ────────────────
|
||||
Pattern {
|
||||
id: "php.sqli.query_concat",
|
||||
description: "mysql_query/mysqli_query with concatenated SQL string",
|
||||
query: r#"(function_call_expression
|
||||
function: (name) @n (#match? @n "^(mysql_query|mysqli_query)$")
|
||||
arguments: (arguments
|
||||
(argument (binary_expression) @concat)))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::B,
|
||||
category: PatternCategory::SqlInjection,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
// ── Tier B: Path traversal (include with variable) ─────────────────
|
||||
Pattern {
|
||||
id: "php.path.include_variable",
|
||||
description: "include/require with variable path — file inclusion vulnerability",
|
||||
query: r#"(include_expression (variable_name)) @vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::B,
|
||||
category: PatternCategory::PathTraversal,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
// ── Tier A: Crypto ─────────────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "php.crypto.md5",
|
||||
description: "md5() — weak hash function",
|
||||
query: r#"(function_call_expression
|
||||
function: (name) @n (#eq? @n "md5"))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Crypto,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "system_call",
|
||||
description: "system()/shell_exec()/exec() command execution",
|
||||
query: "(function_call_expression function: (name) @n (#match? @n \"system|shell_exec|exec|passthru\")) @vuln",
|
||||
severity: Severity::Medium,
|
||||
id: "php.crypto.sha1",
|
||||
description: "sha1() — weak hash function",
|
||||
query: r#"(function_call_expression
|
||||
function: (name) @n (#eq? @n "sha1"))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Crypto,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "php.crypto.rand",
|
||||
description: "rand()/mt_rand() — not cryptographically secure",
|
||||
query: r#"(function_call_expression
|
||||
function: (name) @n (#match? @n "^(rand|mt_rand)$"))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Crypto,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
];
|
||||
|
|
|
|||
|
|
@ -1,22 +1,178 @@
|
|||
use crate::patterns::{Pattern, Severity};
|
||||
use crate::evidence::Confidence;
|
||||
use crate::patterns::{Pattern, PatternCategory, PatternTier, Severity};
|
||||
|
||||
/// Python AST patterns.
|
||||
///
|
||||
/// Taint rules cover `eval`/`exec`, `os.system`/`os.popen`/`subprocess.*`,
|
||||
/// and `cursor.execute`. AST patterns here add coverage for **deserialization**,
|
||||
/// **subprocess shell=True** (Tier B — taint doesn't check keyword args), and
|
||||
/// **code execution** sinks that taint cannot structurally verify.
|
||||
pub const PATTERNS: &[Pattern] = &[
|
||||
// ── Tier A: Code execution ─────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "eval_call",
|
||||
description: "eval() on dynamic input",
|
||||
query: "(call function: (identifier) @id (#eq? @id \"eval\")) @vuln",
|
||||
id: "py.code_exec.eval",
|
||||
description: "eval() — dynamic code execution",
|
||||
query: r#"(call function: (identifier) @id (#eq? @id "eval")) @vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "exec_call",
|
||||
description: "exec(...) execution of dynamic code",
|
||||
query: "(call function: (identifier) @id (#eq? @id \"exec\")) @vuln",
|
||||
id: "py.code_exec.exec",
|
||||
description: "exec() — dynamic code execution",
|
||||
query: r#"(call function: (identifier) @id (#eq? @id "exec")) @vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "subprocess_shell_true",
|
||||
description: "subprocess.* with shell=True",
|
||||
query: "(call function: (attribute object: (identifier) @pkg (#eq? @pkg \"subprocess\")) arguments: (argument_list . (keyword_argument name: (identifier) @k (#eq? @k \"shell\")) (true) @val)) @vuln",
|
||||
id: "py.code_exec.compile",
|
||||
description: "compile() with exec/eval mode — code compilation from string",
|
||||
query: r#"(call function: (identifier) @id (#eq? @id "compile")) @vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Command execution ──────────────────────────────────────
|
||||
Pattern {
|
||||
id: "py.cmdi.os_system",
|
||||
description: "os.system() — shell command execution",
|
||||
query: r#"(call
|
||||
function: (attribute
|
||||
object: (identifier) @pkg (#eq? @pkg "os")
|
||||
attribute: (identifier) @fn (#eq? @fn "system")))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CommandExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "py.cmdi.os_popen",
|
||||
description: "os.popen() — shell command execution",
|
||||
query: r#"(call
|
||||
function: (attribute
|
||||
object: (identifier) @pkg (#eq? @pkg "os")
|
||||
attribute: (identifier) @fn (#eq? @fn "popen")))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CommandExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier B: subprocess with shell=True ─────────────────────────────
|
||||
Pattern {
|
||||
id: "py.cmdi.subprocess_shell",
|
||||
description: "subprocess call with shell=True",
|
||||
query: r#"(call
|
||||
function: (attribute
|
||||
object: (identifier) @pkg (#eq? @pkg "subprocess"))
|
||||
arguments: (argument_list
|
||||
(keyword_argument
|
||||
name: (identifier) @k (#eq? @k "shell")
|
||||
value: (true))))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::B,
|
||||
category: PatternCategory::CommandExec,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
// ── Tier A: Deserialization ────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "py.deser.pickle_loads",
|
||||
description: "pickle.loads/load — arbitrary object deserialization",
|
||||
query: r#"(call
|
||||
function: (attribute
|
||||
object: (identifier) @pkg (#eq? @pkg "pickle")
|
||||
attribute: (identifier) @fn (#match? @fn "^loads?$")))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Deserialization,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "py.deser.yaml_load",
|
||||
description: "yaml.load() without SafeLoader — arbitrary object instantiation",
|
||||
query: r#"(call
|
||||
function: (attribute
|
||||
object: (identifier) @pkg (#eq? @pkg "yaml")
|
||||
attribute: (identifier) @fn (#eq? @fn "load")))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Deserialization,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "py.deser.shelve_open",
|
||||
description: "shelve.open() — pickle-backed deserialization",
|
||||
query: r#"(call
|
||||
function: (attribute
|
||||
object: (identifier) @pkg (#eq? @pkg "shelve")
|
||||
attribute: (identifier) @fn (#eq? @fn "open")))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Deserialization,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier B: SQL injection (format/concat heuristic) ────────────────
|
||||
Pattern {
|
||||
id: "py.sqli.execute_format",
|
||||
description: "cursor.execute with string concatenation — SQL injection risk",
|
||||
query: r#"(call
|
||||
function: (attribute
|
||||
attribute: (identifier) @fn (#eq? @fn "execute"))
|
||||
arguments: (argument_list
|
||||
(binary_operator) @arg))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::B,
|
||||
category: PatternCategory::SqlInjection,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
// ── Tier A: Weak crypto ────────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "py.crypto.md5",
|
||||
description: "hashlib.md5() — weak hash algorithm",
|
||||
query: r#"(call
|
||||
function: (attribute
|
||||
object: (identifier) @pkg (#eq? @pkg "hashlib")
|
||||
attribute: (identifier) @fn (#eq? @fn "md5")))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Crypto,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "py.crypto.sha1",
|
||||
description: "hashlib.sha1() — weak hash algorithm",
|
||||
query: r#"(call
|
||||
function: (attribute
|
||||
object: (identifier) @pkg (#eq? @pkg "hashlib")
|
||||
attribute: (identifier) @fn (#eq? @fn "sha1")))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Crypto,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
// ── Tier A: Template injection ─────────────────────────────────────
|
||||
Pattern {
|
||||
id: "py.xss.jinja_from_string",
|
||||
description: "jinja2.Template from string — potential template injection",
|
||||
query: r#"(call
|
||||
function: (attribute
|
||||
attribute: (identifier) @fn (#eq? @fn "from_string")))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Xss,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
];
|
||||
|
|
|
|||
|
|
@ -1,133 +1,141 @@
|
|||
use crate::patterns::{Pattern, Severity};
|
||||
use crate::evidence::Confidence;
|
||||
use crate::patterns::{Pattern, PatternCategory, PatternTier, Severity};
|
||||
|
||||
/// Ruby AST patterns.
|
||||
///
|
||||
/// Taint rules cover `system`/`exec` (command injection), `eval` (code
|
||||
/// execution), and `puts`/`print` (output sinks). AST patterns here focus on
|
||||
/// **deserialization** (YAML.load, Marshal.load), **instance_eval/class_eval**,
|
||||
/// **backtick shell**, **send with dynamic arg**, and **constantize**.
|
||||
pub const PATTERNS: &[Pattern] = &[
|
||||
// ---------- Runtime code-execution primitives ----------
|
||||
// ── Tier A: Code execution ─────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "eval_call",
|
||||
description: "Kernel#eval usage",
|
||||
query: r#"
|
||||
(call
|
||||
(identifier) @id
|
||||
(#eq? @id "eval")
|
||||
) @vuln
|
||||
"#,
|
||||
id: "rb.code_exec.eval",
|
||||
description: "Kernel#eval — dynamic code execution",
|
||||
query: r#"(call (identifier) @id (#eq? @id "eval")) @vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "instance_eval_call",
|
||||
description: "Object#instance_eval usage",
|
||||
query: r#"
|
||||
(call
|
||||
(identifier) @id
|
||||
(#eq? @id "instance_eval")
|
||||
) @vuln
|
||||
"#,
|
||||
id: "rb.code_exec.instance_eval",
|
||||
description: "instance_eval — evaluates string in object context",
|
||||
query: r#"(call
|
||||
method: (identifier) @id (#eq? @id "instance_eval"))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "class_eval_call",
|
||||
description: "Module#class_eval / module_eval usage",
|
||||
query: r#"
|
||||
(call
|
||||
(identifier) @id
|
||||
(#match? @id "^(class_eval|module_eval)$")
|
||||
) @vuln
|
||||
"#,
|
||||
id: "rb.code_exec.class_eval",
|
||||
description: "class_eval / module_eval — evaluates string in class context",
|
||||
query: r#"(call
|
||||
method: (identifier) @id (#match? @id "^(class_eval|module_eval)$"))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ---------- Shell execution ----------
|
||||
// ── Tier A: Command execution ──────────────────────────────────────
|
||||
Pattern {
|
||||
id: "system_exec_interp",
|
||||
description: "system/exec with string interpolation",
|
||||
query: r#"
|
||||
(call
|
||||
method: (identifier) @m
|
||||
(#match? @m "^(system|exec)$")
|
||||
arguments: (argument_list
|
||||
(string
|
||||
(interpolation)+ @vuln
|
||||
)
|
||||
)
|
||||
)
|
||||
"#,
|
||||
id: "rb.cmdi.backtick",
|
||||
description: "Backtick shell execution",
|
||||
query: r#"(subshell) @vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CommandExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Shell execution ─────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "rb.cmdi.system_interp",
|
||||
description: "system/exec call — command execution risk",
|
||||
query: r#"(call
|
||||
method: (identifier) @m (#match? @m "^(system|exec)$"))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CommandExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Deserialization ────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "rb.deser.yaml_load",
|
||||
description: "YAML.load — arbitrary object deserialization (use safe_load instead)",
|
||||
query: r#"(call
|
||||
receiver: (constant) @recv (#match? @recv "^(YAML|Psych)$")
|
||||
method: (identifier) @m (#eq? @m "load"))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Deserialization,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "backtick_command",
|
||||
description: "Back-tick shell execution",
|
||||
// `uname -a`
|
||||
query: r#"(shell_command) @vuln"#,
|
||||
id: "rb.deser.marshal_load",
|
||||
description: "Marshal.load — arbitrary Ruby object deserialization",
|
||||
query: r#"(call
|
||||
receiver: (constant) @recv (#eq? @recv "Marshal")
|
||||
method: (identifier) @m (#eq? @m "load"))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Deserialization,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ---------- Dangerous deserialisation ----------
|
||||
// ── Tier A: Reflection ─────────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "yaml_load",
|
||||
description: "YAML.load / Psych.load (arbitrary object deserialisation)",
|
||||
query: r#"
|
||||
(call
|
||||
receiver: (constant) @recv
|
||||
(#match? @recv "^(YAML|Psych)$")
|
||||
method: (identifier) @m
|
||||
(#eq? @m "load")
|
||||
) @vuln
|
||||
"#,
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "marshal_load",
|
||||
description: "Marshal.load usage",
|
||||
query: r#"
|
||||
(call
|
||||
receiver: (constant) @recv
|
||||
(#eq? @recv "Marshal")
|
||||
method: (identifier) @m
|
||||
(#eq? @m "load")
|
||||
) @vuln
|
||||
"#,
|
||||
severity: Severity::High,
|
||||
},
|
||||
// ---------- Reflection / meta-programming ----------
|
||||
Pattern {
|
||||
id: "send_dynamic",
|
||||
description: "send() with dynamic first argument (not a literal symbol)",
|
||||
query: r#"
|
||||
(call
|
||||
method: (identifier) @m
|
||||
(#eq? @m "send")
|
||||
arguments: (argument_list
|
||||
[
|
||||
(identifier) ; send(method_name_var, …)
|
||||
(string (interpolation)+) ; send("user_#{role}", …)
|
||||
] @vuln
|
||||
)
|
||||
)
|
||||
id: "rb.reflection.send_dynamic",
|
||||
description: "send() with non-symbol argument — arbitrary method dispatch",
|
||||
query: r#"(call
|
||||
method: (identifier) @m (#eq? @m "send")
|
||||
arguments: (argument_list
|
||||
[(identifier) (string (interpolation)+)] @vuln))
|
||||
"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::B,
|
||||
category: PatternCategory::Reflection,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "constantize_call",
|
||||
description: "ActiveSupport constantize / safe_constantize on tainted data",
|
||||
query: r#"
|
||||
(call
|
||||
method: (identifier) @m
|
||||
(#match? @m "^(constantize|safe_constantize)$")
|
||||
) @vuln
|
||||
"#,
|
||||
id: "rb.reflection.constantize",
|
||||
description: "constantize / safe_constantize — dynamic class resolution",
|
||||
query: r#"(call
|
||||
method: (identifier) @m (#match? @m "^(constantize|safe_constantize)$"))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Reflection,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ---------- Insecure resource access ----------
|
||||
// ── Tier A: SSRF ───────────────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "open_uri_http",
|
||||
description: "Kernel#open with HTTP(S) URL (open-uri auto-follow)",
|
||||
query: r#"
|
||||
(call
|
||||
method: (identifier) @m
|
||||
(#eq? @m "open")
|
||||
arguments: (argument_list
|
||||
(string) @url
|
||||
(#match? @url "^\"https?://")
|
||||
)
|
||||
) @vuln
|
||||
"#,
|
||||
id: "rb.ssrf.open_uri",
|
||||
description: "Kernel#open with HTTP URL — SSRF via open-uri",
|
||||
query: r#"(call
|
||||
method: (identifier) @m (#eq? @m "open")
|
||||
arguments: (argument_list
|
||||
(string) @url (#match? @url "^\"https?://")))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::InsecureTransport,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Crypto ─────────────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "rb.crypto.md5",
|
||||
description: "Digest::MD5 — weak hash algorithm",
|
||||
query: r#"(scope_resolution
|
||||
name: (constant) @c (#eq? @c "MD5"))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Crypto,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
];
|
||||
|
|
|
|||
|
|
@ -1,118 +1,170 @@
|
|||
use crate::patterns::{Pattern, Severity};
|
||||
use crate::evidence::Confidence;
|
||||
use crate::patterns::{Pattern, PatternCategory, PatternTier, Severity};
|
||||
|
||||
/// Rust AST patterns.
|
||||
///
|
||||
/// Rust taint rules already cover `Command::new`/`arg`/`status`/`output` sinks
|
||||
/// and `env::var` / `fs::read_to_string` sources, so we do NOT duplicate those.
|
||||
/// Patterns here focus on **unsafe memory**, **panicking APIs**, and structural
|
||||
/// code-quality signals specific to Rust.
|
||||
pub const PATTERNS: &[Pattern] = &[
|
||||
// ── Tier A: Memory Safety (unsafe) ─────────────────────────────────
|
||||
Pattern {
|
||||
id: "unsafe_block",
|
||||
description: "Use of an `unsafe` block",
|
||||
id: "rs.memory.transmute",
|
||||
description: "std::mem::transmute — unchecked type reinterpretation",
|
||||
query: r#"(call_expression
|
||||
function: (scoped_identifier
|
||||
path: (identifier) @p (#eq? @p "mem")
|
||||
name: (identifier) @f (#eq? @f "transmute")))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "rs.memory.copy_nonoverlapping",
|
||||
description: "ptr::copy_nonoverlapping — raw pointer memcpy",
|
||||
query: r#"(call_expression
|
||||
function: (scoped_identifier
|
||||
path: (identifier) @p (#eq? @p "ptr")
|
||||
name: (identifier) @f (#eq? @f "copy_nonoverlapping")))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "rs.memory.get_unchecked",
|
||||
description: "get_unchecked / get_unchecked_mut — unchecked indexing",
|
||||
query: r#"(call_expression
|
||||
function: (field_expression
|
||||
field: (field_identifier) @m
|
||||
(#match? @m "^get_unchecked(_mut)?$")))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "rs.memory.mem_zeroed",
|
||||
description: "std::mem::zeroed — zero-initialised memory may be UB for non-POD types",
|
||||
query: r#"(call_expression
|
||||
function: (scoped_identifier
|
||||
path: (identifier) @p (#eq? @p "mem")
|
||||
name: (identifier) @n (#eq? @n "zeroed")))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "rs.memory.ptr_read",
|
||||
description: "ptr::read / ptr::read_volatile — raw pointer dereference",
|
||||
query: r#"(call_expression
|
||||
function: (scoped_identifier
|
||||
path: (identifier) @p (#eq? @p "ptr")
|
||||
name: (identifier) @n (#match? @n "^read(_volatile)?$")))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Code quality / robustness ──────────────────────────────
|
||||
Pattern {
|
||||
id: "rs.quality.unsafe_block",
|
||||
description: "unsafe block — manual memory safety obligation",
|
||||
query: "(unsafe_block) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "unsafe_fn",
|
||||
description: "`unsafe fn` declaration",
|
||||
query: "(function_item
|
||||
(function_modifiers) @mods
|
||||
(#match? @mods \"^unsafe\\b\")) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "transmute_call",
|
||||
description: "`std::mem::transmute` call",
|
||||
query: "(call_expression
|
||||
function: (scoped_identifier
|
||||
path: (identifier) @p (#eq? @p \"mem\")
|
||||
name: (identifier) @f (#eq? @f \"transmute\")))
|
||||
@vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "copy_nonoverlapping",
|
||||
description: "Raw pointer `copy_nonoverlapping`",
|
||||
query: "(call_expression
|
||||
function: (scoped_identifier
|
||||
path: (identifier) @p (#eq? @p \"ptr\")
|
||||
name: (identifier) @f (#eq? @f \"copy_nonoverlapping\")))
|
||||
@vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "get_unchecked",
|
||||
description: "`get_unchecked` / `get_unchecked_mut` slice access",
|
||||
query: "(call_expression
|
||||
function: (field_expression
|
||||
field: (field_identifier) @m
|
||||
(#match? @m \"get_unchecked(_mut)?\"))) @vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "unwrap_call",
|
||||
description: "`.unwrap()` call (may panic)",
|
||||
query: "(call_expression
|
||||
function: (field_expression
|
||||
field: (field_identifier) @name
|
||||
(#eq? @name \"unwrap\"))) ; exact match
|
||||
@vuln",
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "expect_call",
|
||||
description: "`.expect()` call (may panic)",
|
||||
query: "(call_expression
|
||||
function: (field_expression
|
||||
field: (field_identifier) @name
|
||||
(#eq? @name \"expect\"))) @vuln",
|
||||
id: "rs.quality.unsafe_fn",
|
||||
description: "unsafe fn declaration",
|
||||
query: r#"(function_item
|
||||
(function_modifiers) @mods
|
||||
(#match? @mods "^unsafe"))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "panic_macro",
|
||||
description: "`panic!` macro invocation",
|
||||
query: "(macro_invocation (identifier) @id (#eq? @id \"panic\")) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "todo_or_unimplemented",
|
||||
description: "`todo!()` / `unimplemented!()` placeholder",
|
||||
query: "(macro_invocation
|
||||
(identifier) @id
|
||||
(#match? @id \"todo|unimplemented\")) @vuln",
|
||||
id: "rs.quality.unwrap",
|
||||
description: ".unwrap() — panics on None/Err",
|
||||
query: r#"(call_expression
|
||||
function: (field_expression
|
||||
field: (field_identifier) @name (#eq? @name "unwrap")))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeQuality,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "narrow_cast_with_as",
|
||||
description: "`as` cast to an 8-/16-bit integer (possible truncation)",
|
||||
query: "(type_cast_expression
|
||||
type: (primitive_type) @to
|
||||
(#match? @to \"^u?i(8|16)$\")) @vuln",
|
||||
id: "rs.quality.expect",
|
||||
description: ".expect() — panics on None/Err",
|
||||
query: r#"(call_expression
|
||||
function: (field_expression
|
||||
field: (field_identifier) @name (#eq? @name "expect")))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeQuality,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "mem_zeroed",
|
||||
description: "`std::mem::zeroed()`",
|
||||
query: "(call_expression function:(scoped_identifier path:(identifier)@p (#eq? @p \"mem\") name:(identifier)@n (#eq? @n \"zeroed\")))@vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "mem_forget",
|
||||
description: "`std::mem::forget()`",
|
||||
query: "(call_expression function:(scoped_identifier path:(identifier)@p (#eq? @p \"mem\") name:(identifier)@n (#eq? @n \"forget\")))@vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "ptr_read",
|
||||
description: "`ptr::read_*` raw-ptr read",
|
||||
query: "(call_expression function:(scoped_identifier path:(identifier)@p (#eq? @p \"ptr\") name:(identifier)@n (#match? @n \"read(_volatile)?\")))@vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "arc_unwrap",
|
||||
description: "`Arc::unwrap_or_else_unchecked`",
|
||||
query: "(call_expression function:(scoped_identifier name:(identifier)@n (#eq? @n \"unwrap_or_else_unchecked\")))@vuln",
|
||||
severity: Severity::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "dbg_macro",
|
||||
description: "`dbg!()` left in code",
|
||||
query: "(macro_invocation (identifier)@id (#eq? @id \"dbg\"))@vuln",
|
||||
id: "rs.quality.panic_macro",
|
||||
description: "panic! macro invocation",
|
||||
query: r#"(macro_invocation (identifier) @id (#eq? @id "panic")) @vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeQuality,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "rs.quality.todo",
|
||||
description: "todo!() / unimplemented!() placeholder left in code",
|
||||
query: r#"(macro_invocation
|
||||
(identifier) @id
|
||||
(#match? @id "^(todo|unimplemented)$"))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeQuality,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Narrowing cast ─────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "rs.memory.narrow_cast",
|
||||
description: "`as` cast to 8/16-bit integer — possible truncation",
|
||||
query: r#"(type_cast_expression
|
||||
type: (primitive_type) @to
|
||||
(#match? @to "^(u8|i8|u16|i16)$"))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "rs.memory.mem_forget",
|
||||
description: "std::mem::forget — may leak resources",
|
||||
query: r#"(call_expression
|
||||
function: (scoped_identifier
|
||||
path: (identifier) @p (#eq? @p "mem")
|
||||
name: (identifier) @n (#eq? @n "forget")))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::MemorySafety,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
];
|
||||
|
|
|
|||
|
|
@ -1,100 +1,157 @@
|
|||
use crate::patterns::{Pattern, Severity};
|
||||
use crate::evidence::Confidence;
|
||||
use crate::patterns::{Pattern, PatternCategory, PatternTier, Severity};
|
||||
|
||||
/// TypeScript AST patterns.
|
||||
///
|
||||
/// TypeScript shares most patterns with JavaScript. Taint rules cover `eval`,
|
||||
/// `innerHTML`, and `child_process.*` sinks. AST patterns here mirror JS
|
||||
/// patterns plus TS-specific `any` type-safety escapes.
|
||||
pub const PATTERNS: &[Pattern] = &[
|
||||
// ── Tier A: Code execution ─────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "eval_call",
|
||||
description: "Use of eval()",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"eval\")) @vuln",
|
||||
id: "ts.code_exec.eval",
|
||||
description: "eval() — dynamic code execution",
|
||||
query: r#"(call_expression
|
||||
function: (identifier) @id (#eq? @id "eval"))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "new_function",
|
||||
description: "new Function() constructor",
|
||||
query: "(new_expression constructor: (identifier) @id (#eq? @id \"Function\")) @vuln",
|
||||
id: "ts.code_exec.new_function",
|
||||
description: "new Function() constructor — eval equivalent",
|
||||
query: r#"(new_expression
|
||||
constructor: (identifier) @id (#eq? @id "Function"))
|
||||
@vuln"#,
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "document_write",
|
||||
description: "document.write() call",
|
||||
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"write\"))) @vuln",
|
||||
id: "ts.code_exec.settimeout_string",
|
||||
description: "setTimeout/setInterval with string argument — implicit eval",
|
||||
query: r#"(call_expression
|
||||
function: (identifier) @id (#match? @id "^(setTimeout|setInterval)$")
|
||||
arguments: (arguments (string) @code))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeExec,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: XSS sinks ──────────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "settimeout_string",
|
||||
description: "setTimeout / setInterval with a string argument",
|
||||
query: "(call_expression function: (identifier) @id (#match? @id \"setTimeout|setInterval\") arguments: (arguments (string) @code . _)) @vuln",
|
||||
id: "ts.xss.document_write",
|
||||
description: "document.write() — XSS sink",
|
||||
query: r#"(call_expression
|
||||
function: (member_expression
|
||||
object: (identifier) @obj (#eq? @obj "document")
|
||||
property: (property_identifier) @prop (#match? @prop "^(write|writeln)$")))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Xss,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "any_type",
|
||||
description: "Type annotation of `any`",
|
||||
query: "(type_annotation (predefined_type) @t (#eq? @t \"any\")) @vuln",
|
||||
severity: Severity::Low,
|
||||
},
|
||||
Pattern {
|
||||
id: "json_parse",
|
||||
description: "JSON.parse on dynamic string",
|
||||
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"JSON\") property: (property_identifier) @prop (#eq? @prop \"parse\"))) @vuln",
|
||||
severity: Severity::Low,
|
||||
},
|
||||
Pattern {
|
||||
id: "as_any_assertion",
|
||||
description: "Type assertion to `any` using `as any`",
|
||||
query: "(as_expression type: (predefined_type) @t (#eq? @t \"any\")) @vuln",
|
||||
severity: Severity::Low,
|
||||
},
|
||||
Pattern {
|
||||
id: "type_assertion_any",
|
||||
description: "Type assertion to `any` using `<any>` syntax",
|
||||
query: "(type_assertion type: (predefined_type) @t (#eq? @t \"any\")) @vuln",
|
||||
severity: Severity::Low,
|
||||
},
|
||||
Pattern {
|
||||
id: "outer_html_assignment",
|
||||
description: "Assignment to element.outerHTML",
|
||||
query: "(assignment_expression left: (member_expression property: (property_identifier) @prop (#eq? @prop \"outerHTML\"))) @vuln",
|
||||
id: "ts.xss.outer_html",
|
||||
description: "Assignment to .outerHTML — XSS sink",
|
||||
query: r#"(assignment_expression
|
||||
left: (member_expression
|
||||
property: (property_identifier) @prop (#eq? @prop "outerHTML")))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Xss,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
Pattern {
|
||||
id: "insert_adjacent_html",
|
||||
description: "insertAdjacentHTML() call",
|
||||
query: "(call_expression function: (member_expression property: (property_identifier) @prop (#eq? @prop \"insertAdjacentHTML\"))) @vuln",
|
||||
id: "ts.xss.insert_adjacent_html",
|
||||
description: "insertAdjacentHTML() — XSS sink",
|
||||
query: r#"(call_expression
|
||||
function: (member_expression
|
||||
property: (property_identifier) @prop (#eq? @prop "insertAdjacentHTML")))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Xss,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Weak crypto ────────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "ts.crypto.math_random",
|
||||
description: "Math.random() — not cryptographically secure",
|
||||
query: r#"(call_expression
|
||||
function: (member_expression
|
||||
object: (identifier) @obj (#eq? @obj "Math")
|
||||
property: (property_identifier) @prop (#eq? @prop "random")))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Crypto,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
// ── Tier A: TypeScript-specific type-safety escapes ────────────────
|
||||
Pattern {
|
||||
id: "ts.quality.any_annotation",
|
||||
description: "Type annotation of `any` — disables type checking",
|
||||
query: r#"(type_annotation (predefined_type) @t (#eq? @t "any")) @vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeQuality,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "document_cookie_write",
|
||||
id: "ts.quality.as_any",
|
||||
description: "Type assertion `as any` — type-safety escape hatch",
|
||||
query: r#"(as_expression (predefined_type) @t (#eq? @t "any")) @vuln"#,
|
||||
severity: Severity::Low,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::CodeQuality,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
// ── Tier A: Prototype pollution ────────────────────────────────────
|
||||
Pattern {
|
||||
id: "ts.prototype.proto_assignment",
|
||||
description: "Assignment to __proto__ — prototype pollution",
|
||||
query: r#"(assignment_expression
|
||||
left: (member_expression
|
||||
property: (property_identifier) @prop (#eq? @prop "__proto__")))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Prototype,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Open redirect ──────────────────────────────────────────
|
||||
Pattern {
|
||||
id: "ts.xss.location_assign",
|
||||
description: "Assignment to location/location.href — open redirect",
|
||||
query: r#"(assignment_expression
|
||||
left: (member_expression
|
||||
object: (identifier) @obj (#match? @obj "^(window|location|document)$")
|
||||
property: (property_identifier) @prop (#match? @prop "^(location|href)$")))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Xss,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Cookie manipulation ────────────────────────────────────
|
||||
Pattern {
|
||||
id: "ts.xss.cookie_write",
|
||||
description: "Write to document.cookie",
|
||||
query: "(assignment_expression left: (member_expression object: (identifier) @obj (#eq? @obj \"document\") property: (property_identifier) @prop (#eq? @prop \"cookie\"))) @vuln",
|
||||
query: r#"(assignment_expression
|
||||
left: (member_expression
|
||||
object: (identifier) @obj (#eq? @obj "document")
|
||||
property: (property_identifier) @prop (#eq? @prop "cookie")))
|
||||
@vuln"#,
|
||||
severity: Severity::Low,
|
||||
},
|
||||
Pattern {
|
||||
id: "onclick_setattribute",
|
||||
description: "Element.setAttribute('onclick', …)",
|
||||
query: "(call_expression function: (member_expression property: (property_identifier) @prop (#eq? @prop \"setAttribute\")) arguments: (arguments (string) @name (#eq? @name \"\\\"onclick\\\"\") . (string) @handler)) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "math_random_call",
|
||||
description: "Use of Math.random() for security-sensitive randomness",
|
||||
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"Math\") property: (property_identifier) @prop (#eq? @prop \"random\"))) @vuln",
|
||||
severity: Severity::Low,
|
||||
},
|
||||
Pattern {
|
||||
id: "crypto_createhash_md5",
|
||||
description: "Insecure hash algorithm: crypto.createHash('md5')",
|
||||
query: "(call_expression function: (member_expression object: (identifier) @obj (#eq? @obj \"crypto\") property: (property_identifier) @prop (#eq? @prop \"createHash\")) arguments: (arguments (string) @alg (#match? @alg \"(?i)\\\"md5\\\"\"))) @vuln",
|
||||
severity: Severity::Medium,
|
||||
},
|
||||
Pattern {
|
||||
id: "fetch_http_url",
|
||||
description: "fetch() over plain HTTP",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"fetch\") arguments: (arguments (string) @url (#match? @url \"^\\\"http://\"))) @vuln",
|
||||
severity: Severity::Low,
|
||||
},
|
||||
Pattern {
|
||||
id: "xhr_eval_response",
|
||||
description: "eval() of XMLHttpRequest.responseText",
|
||||
query: "(call_expression function: (identifier) @id (#eq? @id \"eval\") arguments: (arguments (member_expression property: (property_identifier) @prop (#eq? @prop \"responseText\")))) @vuln",
|
||||
severity: Severity::High,
|
||||
tier: PatternTier::A,
|
||||
category: PatternCategory::Xss,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
];
|
||||
|
|
|
|||
646
src/rank.rs
Normal file
646
src/rank.rs
Normal file
|
|
@ -0,0 +1,646 @@
|
|||
//! Attack surface ranking for scan diagnostics.
|
||||
//!
|
||||
//! Computes a deterministic score for each [`Diag`] using only in-memory
|
||||
//! information (severity, evidence, source kind, rule ID, validation state).
|
||||
//! The score is used to sort findings so that truncation keeps the most
|
||||
//! exploitable / important results.
|
||||
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::evidence::Evidence;
|
||||
use crate::patterns::Severity;
|
||||
use std::hash::{DefaultHasher, Hash, Hasher};
|
||||
|
||||
/// Computed attack-surface ranking for a single diagnostic.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct AttackRank {
|
||||
pub score: f64,
|
||||
/// Breakdown of score components (for debug/display purposes).
|
||||
#[allow(dead_code)]
|
||||
pub components: Vec<(String, String)>,
|
||||
}
|
||||
|
||||
/// Compute an attack-surface score for `diag`.
|
||||
///
|
||||
/// The score is a positive `f64`; higher means more exploitable / important.
|
||||
/// Components are returned for optional debug/display.
|
||||
pub fn compute_attack_rank(diag: &Diag) -> AttackRank {
|
||||
let mut score = 0.0_f64;
|
||||
let mut components: Vec<(String, String)> = Vec::new();
|
||||
|
||||
// ── 1. Severity base ────────────────────────────────────────────────
|
||||
let sev_score = match diag.severity {
|
||||
Severity::High => 60.0,
|
||||
Severity::Medium => 30.0,
|
||||
Severity::Low => 10.0,
|
||||
};
|
||||
score += sev_score;
|
||||
components.push(("severity".into(), format!("{sev_score}")));
|
||||
|
||||
// ── 2. Analysis kind bonus ──────────────────────────────────────────
|
||||
//
|
||||
// Taint-confirmed findings are the strongest signal. State findings
|
||||
// (resource lifecycle / auth) are next. CFG-structural findings
|
||||
// without taint evidence rank lower. AST-only pattern matches are
|
||||
// the weakest.
|
||||
let kind_bonus = analysis_kind_bonus(&diag.id, diag.evidence.as_ref());
|
||||
score += kind_bonus;
|
||||
if kind_bonus != 0.0 {
|
||||
components.push(("analysis_kind".into(), format!("{kind_bonus}")));
|
||||
}
|
||||
|
||||
// ── 3. Evidence strength / source-kind priority ─────────────────────
|
||||
let evidence_bonus = evidence_strength(diag);
|
||||
score += evidence_bonus;
|
||||
if evidence_bonus != 0.0 {
|
||||
components.push(("evidence".into(), format!("{evidence_bonus}")));
|
||||
}
|
||||
|
||||
// ── 4. State finding sub-ranking ────────────────────────────────────
|
||||
let state_bonus = state_finding_bonus(&diag.id);
|
||||
score += state_bonus;
|
||||
if state_bonus != 0.0 {
|
||||
components.push(("state_rule".into(), format!("{state_bonus}")));
|
||||
}
|
||||
|
||||
// ── 5. Path validation penalty ──────────────────────────────────────
|
||||
//
|
||||
// If a taint path is guarded by a validation predicate, the finding
|
||||
// has higher informational value but lower exploitability because the
|
||||
// guard may prevent the vulnerability from being triggered. Apply a
|
||||
// small penalty (–5) to push validated paths below otherwise-equal
|
||||
// unvalidated ones without changing the overall ranking tier.
|
||||
let path_validated = diag.evidence.as_ref().map_or(diag.path_validated, |ev| {
|
||||
ev.notes.iter().any(|n| n == "path_validated")
|
||||
});
|
||||
if path_validated {
|
||||
score -= 5.0;
|
||||
components.push(("path_validated_penalty".into(), "-5".into()));
|
||||
}
|
||||
|
||||
AttackRank { score, components }
|
||||
}
|
||||
|
||||
/// Deterministic sort key for a diagnostic.
|
||||
///
|
||||
/// Two diags with identical scores are tie-broken by:
|
||||
/// severity (High < Medium < Low in the `Ord` impl, so we negate)
|
||||
/// → rule ID → file path → line → col → message hash
|
||||
///
|
||||
/// Returns a tuple suitable for `sort_by`.
|
||||
pub fn sort_key(diag: &Diag) -> impl Ord {
|
||||
let sev_ord: u8 = match diag.severity {
|
||||
Severity::High => 0,
|
||||
Severity::Medium => 1,
|
||||
Severity::Low => 2,
|
||||
};
|
||||
let msg_hash = {
|
||||
let mut h = DefaultHasher::new();
|
||||
diag.message.hash(&mut h);
|
||||
h.finish()
|
||||
};
|
||||
(
|
||||
sev_ord,
|
||||
diag.id.clone(),
|
||||
diag.path.clone(),
|
||||
diag.line,
|
||||
diag.col,
|
||||
msg_hash,
|
||||
)
|
||||
}
|
||||
|
||||
/// Sort diagnostics in-place by descending attack-surface score, then by
|
||||
/// deterministic tie-breaker. Populates `rank_score` on each `Diag`.
|
||||
pub fn rank_diags(diags: &mut [Diag]) {
|
||||
// Compute scores
|
||||
let scores: Vec<f64> = diags.iter().map(|d| compute_attack_rank(d).score).collect();
|
||||
|
||||
// Attach scores to diags
|
||||
for (d, s) in diags.iter_mut().zip(scores.iter()) {
|
||||
d.rank_score = Some(*s);
|
||||
}
|
||||
|
||||
// Sort descending by score, then ascending by tie-breaker
|
||||
diags.sort_by(|a, b| {
|
||||
let sa = a.rank_score.unwrap_or(0.0);
|
||||
let sb = b.rank_score.unwrap_or(0.0);
|
||||
// Descending score (higher first)
|
||||
sb.partial_cmp(&sa)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
.then_with(|| sort_key(a).cmp(&sort_key(b)))
|
||||
});
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Scoring helpers
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Bonus based on analysis kind inferred from rule ID + evidence.
|
||||
fn analysis_kind_bonus(rule_id: &str, evidence: Option<&Evidence>) -> f64 {
|
||||
if rule_id.starts_with("taint-") {
|
||||
// Taint-confirmed flow is the strongest signal
|
||||
10.0
|
||||
} else if rule_id.starts_with("state-") {
|
||||
// State-model findings (resource / auth) are strong
|
||||
8.0
|
||||
} else if rule_id.starts_with("cfg-") {
|
||||
// CFG-structural findings: boost if evidence exists
|
||||
if evidence.is_some_and(|e| !e.is_empty()) {
|
||||
5.0
|
||||
} else {
|
||||
3.0
|
||||
}
|
||||
} else {
|
||||
// AST-only pattern match
|
||||
0.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Bonus from evidence strength: number of evidence items and source-kind
|
||||
/// priority.
|
||||
fn evidence_strength(diag: &Diag) -> f64 {
|
||||
let mut bonus = 0.0;
|
||||
|
||||
if let Some(ev) = &diag.evidence {
|
||||
// Count structured evidence items (capped at 4)
|
||||
let item_count = ev.source.is_some() as usize
|
||||
+ ev.sink.is_some() as usize
|
||||
+ (ev.guards.len() + ev.sanitizers.len()).min(2);
|
||||
bonus += item_count.min(4) as f64;
|
||||
|
||||
// Source-kind priority from evidence notes
|
||||
for note in &ev.notes {
|
||||
if let Some(kind) = note.strip_prefix("source_kind:") {
|
||||
bonus += source_kind_priority(kind);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Fallback for DB-cached diags without structured evidence
|
||||
bonus += (diag.labels.len() as f64).min(4.0);
|
||||
for (label, value) in &diag.labels {
|
||||
if label == "Source" {
|
||||
bonus += source_kind_priority(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bonus
|
||||
}
|
||||
|
||||
/// Priority bonus based on the source kind string found in evidence.
|
||||
///
|
||||
/// UserInput / EnvironmentConfig / Unknown are most exploitable.
|
||||
/// FileSystem / Database are lower because the attacker needs a more
|
||||
/// indirect vector.
|
||||
fn source_kind_priority(source_value: &str) -> f64 {
|
||||
// Structured SourceKind enum values (from evidence.notes "source_kind:X")
|
||||
match source_value {
|
||||
"UserInput" => return 6.0,
|
||||
"EnvironmentConfig" => return 5.0,
|
||||
"FileSystem" => return 3.0,
|
||||
"Database" => return 2.0,
|
||||
"Unknown" => return 4.0,
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Fallback: substring matching for legacy labels
|
||||
let lower = source_value.to_ascii_lowercase();
|
||||
if lower.contains("stdin")
|
||||
|| lower.contains("argv")
|
||||
|| lower.contains("request")
|
||||
|| lower.contains("form")
|
||||
|| lower.contains("query")
|
||||
|| lower.contains("param")
|
||||
|| lower.contains("header")
|
||||
|| lower.contains("body")
|
||||
|| lower.contains("read_line")
|
||||
{
|
||||
// Strong user-input signals
|
||||
6.0
|
||||
} else if lower.contains("env") || lower.contains("var(") || lower.contains("getenv") {
|
||||
// Environment / config — still attacker-controllable in many deployments
|
||||
5.0
|
||||
} else if lower.contains("read") || lower.contains("file") || lower.contains("open") {
|
||||
// File system — needs indirect vector
|
||||
3.0
|
||||
} else if lower.contains("query") || lower.contains("fetch") || lower.contains("select") {
|
||||
// Database — needs prior injection
|
||||
2.0
|
||||
} else {
|
||||
// Unknown / unrecognised — treat as moderately exploitable
|
||||
4.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Bonus for specific state-analysis rule IDs.
|
||||
fn state_finding_bonus(rule_id: &str) -> f64 {
|
||||
match rule_id {
|
||||
"state-use-after-close" => 6.0,
|
||||
"state-unauthed-access" => 6.0,
|
||||
"state-double-close" => 3.0,
|
||||
"state-resource-leak" => 2.0, // must-leak
|
||||
"state-resource-leak-possible" => 1.0, // may-leak
|
||||
_ => 0.0,
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Tests
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn make_diag(
|
||||
severity: Severity,
|
||||
id: &str,
|
||||
path: &str,
|
||||
line: usize,
|
||||
labels: Vec<(String, String)>,
|
||||
path_validated: bool,
|
||||
) -> Diag {
|
||||
Diag {
|
||||
path: path.into(),
|
||||
line,
|
||||
col: 1,
|
||||
severity,
|
||||
id: id.into(),
|
||||
category: crate::patterns::FindingCategory::Security,
|
||||
path_validated,
|
||||
guard_kind: None,
|
||||
message: None,
|
||||
labels,
|
||||
confidence: None,
|
||||
evidence: None,
|
||||
rank_score: None,
|
||||
rank_reason: None,
|
||||
suppressed: false,
|
||||
suppression: None,
|
||||
rollup: None,
|
||||
}
|
||||
}
|
||||
|
||||
// ── Ordering tests ──────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn high_taint_user_input_ranks_above_medium_file_io() {
|
||||
let high_taint = make_diag(
|
||||
Severity::High,
|
||||
"taint-unsanitised-flow (source 1:1)",
|
||||
"src/main.rs",
|
||||
10,
|
||||
vec![
|
||||
("Source".into(), "read_line() at 1:1".into()),
|
||||
("Sink".into(), "exec()".into()),
|
||||
],
|
||||
false,
|
||||
);
|
||||
let med_file = make_diag(
|
||||
Severity::Medium,
|
||||
"taint-unsanitised-flow (source 5:1)",
|
||||
"src/lib.rs",
|
||||
20,
|
||||
vec![
|
||||
("Source".into(), "File::open() at 5:1".into()),
|
||||
("Sink".into(), "write()".into()),
|
||||
],
|
||||
false,
|
||||
);
|
||||
|
||||
let score_high = compute_attack_rank(&high_taint).score;
|
||||
let score_med = compute_attack_rank(&med_file).score;
|
||||
assert!(
|
||||
score_high > score_med,
|
||||
"high taint user-input ({score_high}) should rank above medium file-io ({score_med})"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn must_leak_ranks_above_may_leak() {
|
||||
let must = make_diag(
|
||||
Severity::Medium,
|
||||
"state-resource-leak",
|
||||
"src/db.rs",
|
||||
30,
|
||||
vec![],
|
||||
false,
|
||||
);
|
||||
let may = make_diag(
|
||||
Severity::Low,
|
||||
"state-resource-leak-possible",
|
||||
"src/db.rs",
|
||||
35,
|
||||
vec![],
|
||||
false,
|
||||
);
|
||||
|
||||
let score_must = compute_attack_rank(&must).score;
|
||||
let score_may = compute_attack_rank(&may).score;
|
||||
assert!(
|
||||
score_must > score_may,
|
||||
"must-leak ({score_must}) should rank above may-leak ({score_may})"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cfg_without_evidence_ranks_below_taint_confirmed() {
|
||||
let taint = make_diag(
|
||||
Severity::High,
|
||||
"taint-unsanitised-flow (source 1:1)",
|
||||
"src/main.rs",
|
||||
10,
|
||||
vec![
|
||||
("Source".into(), "env::var(\"CMD\") at 1:1".into()),
|
||||
("Sink".into(), "exec()".into()),
|
||||
],
|
||||
false,
|
||||
);
|
||||
let cfg_only = make_diag(
|
||||
Severity::High,
|
||||
"cfg-unguarded-sink",
|
||||
"src/main.rs",
|
||||
10,
|
||||
vec![],
|
||||
false,
|
||||
);
|
||||
|
||||
let score_taint = compute_attack_rank(&taint).score;
|
||||
let score_cfg = compute_attack_rank(&cfg_only).score;
|
||||
assert!(
|
||||
score_taint > score_cfg,
|
||||
"taint-confirmed ({score_taint}) should rank above cfg-only ({score_cfg})"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn determinism_input_order_independent() {
|
||||
let d1 = make_diag(
|
||||
Severity::High,
|
||||
"taint-unsanitised-flow (source 1:1)",
|
||||
"a.rs",
|
||||
1,
|
||||
vec![("Source".into(), "stdin at 1:1".into())],
|
||||
false,
|
||||
);
|
||||
let d2 = make_diag(
|
||||
Severity::Medium,
|
||||
"cfg-unguarded-sink",
|
||||
"b.rs",
|
||||
2,
|
||||
vec![],
|
||||
false,
|
||||
);
|
||||
let d3 = make_diag(Severity::Low, "rs.code_exec.eval", "c.rs", 3, vec![], false);
|
||||
|
||||
let mut order_a = vec![d1.clone(), d2.clone(), d3.clone()];
|
||||
let mut order_b = vec![d3, d1, d2];
|
||||
|
||||
rank_diags(&mut order_a);
|
||||
rank_diags(&mut order_b);
|
||||
|
||||
let ids_a: Vec<_> = order_a.iter().map(|d| (&d.id, d.line)).collect();
|
||||
let ids_b: Vec<_> = order_b.iter().map(|d| (&d.id, d.line)).collect();
|
||||
assert_eq!(
|
||||
ids_a, ids_b,
|
||||
"ranking must be deterministic regardless of input order"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn path_validated_penalty_applied() {
|
||||
let unvalidated = make_diag(
|
||||
Severity::High,
|
||||
"taint-unsanitised-flow (source 1:1)",
|
||||
"src/main.rs",
|
||||
10,
|
||||
vec![("Source".into(), "env::var(\"X\") at 1:1".into())],
|
||||
false,
|
||||
);
|
||||
let validated = make_diag(
|
||||
Severity::High,
|
||||
"taint-unsanitised-flow (source 1:1)",
|
||||
"src/main.rs",
|
||||
10,
|
||||
vec![("Source".into(), "env::var(\"X\") at 1:1".into())],
|
||||
true,
|
||||
);
|
||||
|
||||
let score_unval = compute_attack_rank(&unvalidated).score;
|
||||
let score_val = compute_attack_rank(&validated).score;
|
||||
assert!(
|
||||
score_unval > score_val,
|
||||
"unvalidated ({score_unval}) should rank above validated ({score_val})"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn state_use_after_close_ranks_above_may_leak() {
|
||||
let uac = make_diag(
|
||||
Severity::High,
|
||||
"state-use-after-close",
|
||||
"x.rs",
|
||||
1,
|
||||
vec![],
|
||||
false,
|
||||
);
|
||||
let may = make_diag(
|
||||
Severity::Low,
|
||||
"state-resource-leak-possible",
|
||||
"x.rs",
|
||||
2,
|
||||
vec![],
|
||||
false,
|
||||
);
|
||||
|
||||
let score_uac = compute_attack_rank(&uac).score;
|
||||
let score_may = compute_attack_rank(&may).score;
|
||||
assert!(score_uac > score_may);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unauthed_access_ranks_above_resource_leak() {
|
||||
let unauth = make_diag(
|
||||
Severity::High,
|
||||
"state-unauthed-access",
|
||||
"x.rs",
|
||||
1,
|
||||
vec![],
|
||||
false,
|
||||
);
|
||||
let leak = make_diag(
|
||||
Severity::Medium,
|
||||
"state-resource-leak",
|
||||
"x.rs",
|
||||
2,
|
||||
vec![],
|
||||
false,
|
||||
);
|
||||
|
||||
let score_ua = compute_attack_rank(&unauth).score;
|
||||
let score_lk = compute_attack_rank(&leak).score;
|
||||
assert!(score_ua > score_lk);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ast_only_ranks_below_all_others_at_same_severity() {
|
||||
let ast = make_diag(
|
||||
Severity::High,
|
||||
"rs.code_exec.eval",
|
||||
"x.rs",
|
||||
1,
|
||||
vec![],
|
||||
false,
|
||||
);
|
||||
let cfg = make_diag(
|
||||
Severity::High,
|
||||
"cfg-unguarded-sink",
|
||||
"x.rs",
|
||||
2,
|
||||
vec![],
|
||||
false,
|
||||
);
|
||||
let taint = make_diag(
|
||||
Severity::High,
|
||||
"taint-unsanitised-flow (source 1:1)",
|
||||
"x.rs",
|
||||
3,
|
||||
vec![("Source".into(), "env::var(\"X\") at 1:1".into())],
|
||||
false,
|
||||
);
|
||||
let state = make_diag(
|
||||
Severity::High,
|
||||
"state-use-after-close",
|
||||
"x.rs",
|
||||
4,
|
||||
vec![],
|
||||
false,
|
||||
);
|
||||
|
||||
let s_ast = compute_attack_rank(&ast).score;
|
||||
let s_cfg = compute_attack_rank(&cfg).score;
|
||||
let s_taint = compute_attack_rank(&taint).score;
|
||||
let s_state = compute_attack_rank(&state).score;
|
||||
|
||||
assert!(s_ast < s_cfg, "AST ({s_ast}) < CFG ({s_cfg})");
|
||||
assert!(s_ast < s_taint, "AST ({s_ast}) < taint ({s_taint})");
|
||||
assert!(s_ast < s_state, "AST ({s_ast}) < state ({s_state})");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn structured_evidence_source_kind_matches_legacy() {
|
||||
// Structured evidence with source_kind:UserInput note should give
|
||||
// the same source-kind bonus as a legacy "Source" label with user input.
|
||||
let mut structured = make_diag(
|
||||
Severity::High,
|
||||
"taint-unsanitised-flow (source 1:1)",
|
||||
"src/main.rs",
|
||||
10,
|
||||
vec![],
|
||||
false,
|
||||
);
|
||||
structured.evidence = Some(crate::evidence::Evidence {
|
||||
source: Some(crate::evidence::SpanEvidence {
|
||||
path: "src/main.rs".into(),
|
||||
line: 1,
|
||||
col: 1,
|
||||
kind: "source".into(),
|
||||
snippet: Some("read_line()".into()),
|
||||
}),
|
||||
sink: Some(crate::evidence::SpanEvidence {
|
||||
path: "src/main.rs".into(),
|
||||
line: 10,
|
||||
col: 5,
|
||||
kind: "sink".into(),
|
||||
snippet: Some("exec()".into()),
|
||||
}),
|
||||
guards: vec![],
|
||||
sanitizers: vec![],
|
||||
state: None,
|
||||
notes: vec!["source_kind:UserInput".into()],
|
||||
});
|
||||
|
||||
let legacy = make_diag(
|
||||
Severity::High,
|
||||
"taint-unsanitised-flow (source 1:1)",
|
||||
"src/main.rs",
|
||||
10,
|
||||
vec![
|
||||
("Source".into(), "read_line() at 1:1".into()),
|
||||
("Sink".into(), "exec()".into()),
|
||||
],
|
||||
false,
|
||||
);
|
||||
|
||||
let score_structured = compute_attack_rank(&structured).score;
|
||||
let score_legacy = compute_attack_rank(&legacy).score;
|
||||
assert_eq!(
|
||||
score_structured, score_legacy,
|
||||
"structured ({score_structured}) should equal legacy ({score_legacy})"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn evidence_item_count_capped_at_4() {
|
||||
let mut d = make_diag(
|
||||
Severity::High,
|
||||
"taint-unsanitised-flow (source 1:1)",
|
||||
"src/main.rs",
|
||||
10,
|
||||
vec![],
|
||||
false,
|
||||
);
|
||||
let span = || crate::evidence::SpanEvidence {
|
||||
path: "x.rs".into(),
|
||||
line: 1,
|
||||
col: 1,
|
||||
kind: "guard".into(),
|
||||
snippet: None,
|
||||
};
|
||||
d.evidence = Some(crate::evidence::Evidence {
|
||||
source: Some(span()),
|
||||
sink: Some(span()),
|
||||
guards: vec![span(), span(), span()], // 3 guards
|
||||
sanitizers: vec![span()], // 1 sanitizer
|
||||
state: None,
|
||||
notes: vec![],
|
||||
});
|
||||
|
||||
// item_count = 1 (source) + 1 (sink) + min(2, 3+1) = 4
|
||||
// evidence bonus should be exactly 4.0 (from items) + 4.0 (unknown source kind) = 8.0
|
||||
// ... but no source_kind note, so no source priority bonus
|
||||
let score = evidence_strength(&d);
|
||||
assert!(
|
||||
(score - 4.0).abs() < f64::EPSILON,
|
||||
"evidence item count should be capped at 4, got {score}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn path_validated_from_evidence_notes() {
|
||||
let mut d = make_diag(
|
||||
Severity::High,
|
||||
"taint-unsanitised-flow (source 1:1)",
|
||||
"src/main.rs",
|
||||
10,
|
||||
vec![],
|
||||
false, // path_validated is false on Diag
|
||||
);
|
||||
d.evidence = Some(crate::evidence::Evidence {
|
||||
source: None,
|
||||
sink: None,
|
||||
guards: vec![],
|
||||
sanitizers: vec![],
|
||||
state: None,
|
||||
notes: vec!["path_validated".into()],
|
||||
});
|
||||
|
||||
let rank = compute_attack_rank(&d);
|
||||
assert!(
|
||||
rank.components
|
||||
.iter()
|
||||
.any(|(k, _)| k == "path_validated_penalty"),
|
||||
"path_validated note in evidence should trigger penalty"
|
||||
);
|
||||
}
|
||||
}
|
||||
313
src/state/domain.rs
Normal file
313
src/state/domain.rs
Normal file
|
|
@ -0,0 +1,313 @@
|
|||
use super::lattice::Lattice;
|
||||
use super::symbol::SymbolId;
|
||||
use bitflags::bitflags;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
// ── ResourceLifecycle ────────────────────────────────────────────────────
|
||||
|
||||
bitflags! {
|
||||
/// Bitset of possible lifecycle states for a single resource handle.
|
||||
///
|
||||
/// Join = bitwise OR (a variable may be in multiple states across paths).
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub struct ResourceLifecycle: u8 {
|
||||
const UNINIT = 0b0001;
|
||||
const OPEN = 0b0010;
|
||||
const CLOSED = 0b0100;
|
||||
const MOVED = 0b1000;
|
||||
}
|
||||
}
|
||||
|
||||
impl Lattice for ResourceLifecycle {
|
||||
fn bot() -> Self {
|
||||
ResourceLifecycle::empty()
|
||||
}
|
||||
|
||||
fn join(&self, other: &Self) -> Self {
|
||||
*self | *other
|
||||
}
|
||||
|
||||
fn leq(&self, other: &Self) -> bool {
|
||||
self.intersection(*other) == *self
|
||||
}
|
||||
}
|
||||
|
||||
// ── ResourceDomainState ──────────────────────────────────────────────────
|
||||
|
||||
/// Maps interned variable IDs to their lifecycle bitsets.
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
pub struct ResourceDomainState {
|
||||
pub vars: HashMap<SymbolId, ResourceLifecycle>,
|
||||
}
|
||||
|
||||
impl ResourceDomainState {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn get(&self, sym: SymbolId) -> ResourceLifecycle {
|
||||
self.vars
|
||||
.get(&sym)
|
||||
.copied()
|
||||
.unwrap_or(ResourceLifecycle::empty())
|
||||
}
|
||||
|
||||
pub fn set(&mut self, sym: SymbolId, state: ResourceLifecycle) {
|
||||
self.vars.insert(sym, state);
|
||||
}
|
||||
}
|
||||
|
||||
impl Lattice for ResourceDomainState {
|
||||
fn bot() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
|
||||
fn join(&self, other: &Self) -> Self {
|
||||
let mut merged = self.clone();
|
||||
for (&sym, &other_lc) in &other.vars {
|
||||
let entry = merged.vars.entry(sym).or_insert(ResourceLifecycle::empty());
|
||||
*entry = entry.join(&other_lc);
|
||||
}
|
||||
merged
|
||||
}
|
||||
|
||||
fn leq(&self, other: &Self) -> bool {
|
||||
for (&sym, &self_lc) in &self.vars {
|
||||
let other_lc = other.get(sym);
|
||||
if !self_lc.leq(&other_lc) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
// ── AuthLevel ────────────────────────────────────────────────────────────
|
||||
|
||||
/// Simple ordered lattice for path authentication state.
|
||||
///
|
||||
/// Bot = `Unauthed`. Join = `min` (conservative: if any path is unauthed,
|
||||
/// the joined state is unauthed).
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub enum AuthLevel {
|
||||
Unauthed,
|
||||
Authed,
|
||||
Admin,
|
||||
}
|
||||
|
||||
impl Lattice for AuthLevel {
|
||||
fn bot() -> Self {
|
||||
AuthLevel::Unauthed
|
||||
}
|
||||
|
||||
fn join(&self, other: &Self) -> Self {
|
||||
// Conservative: take the minimum (least privileged)
|
||||
(*self).min(*other)
|
||||
}
|
||||
|
||||
fn leq(&self, other: &Self) -> bool {
|
||||
// Higher auth subsumes lower: Unauthed ⊑ Authed ⊑ Admin
|
||||
// In our lattice, join = min, so leq means self >= other
|
||||
*self >= *other
|
||||
}
|
||||
}
|
||||
|
||||
// ── AuthDomainState ──────────────────────────────────────────────────────
|
||||
|
||||
/// Path auth level + per-variable validation bit.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct AuthDomainState {
|
||||
pub auth_level: AuthLevel,
|
||||
pub validated: HashSet<SymbolId>,
|
||||
}
|
||||
|
||||
impl Default for AuthDomainState {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
auth_level: AuthLevel::Unauthed,
|
||||
validated: HashSet::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AuthDomainState {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
}
|
||||
|
||||
impl Lattice for AuthDomainState {
|
||||
fn bot() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
|
||||
fn join(&self, other: &Self) -> Self {
|
||||
Self {
|
||||
auth_level: self.auth_level.join(&other.auth_level),
|
||||
// Only validated on ALL paths counts
|
||||
validated: self
|
||||
.validated
|
||||
.intersection(&other.validated)
|
||||
.copied()
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
|
||||
fn leq(&self, other: &Self) -> bool {
|
||||
self.auth_level.leq(&other.auth_level) && self.validated.is_superset(&other.validated)
|
||||
}
|
||||
}
|
||||
|
||||
// ── ProductState ─────────────────────────────────────────────────────────
|
||||
|
||||
/// Composable product of resource and auth domains.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct ProductState {
|
||||
pub resource: ResourceDomainState,
|
||||
pub auth: AuthDomainState,
|
||||
}
|
||||
|
||||
impl ProductState {
|
||||
pub fn initial() -> Self {
|
||||
Self {
|
||||
resource: ResourceDomainState::new(),
|
||||
auth: AuthDomainState::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Lattice for ProductState {
|
||||
fn bot() -> Self {
|
||||
Self {
|
||||
resource: ResourceDomainState::bot(),
|
||||
auth: AuthDomainState::bot(),
|
||||
}
|
||||
}
|
||||
|
||||
fn join(&self, other: &Self) -> Self {
|
||||
Self {
|
||||
resource: self.resource.join(&other.resource),
|
||||
auth: self.auth.join(&other.auth),
|
||||
}
|
||||
}
|
||||
|
||||
fn leq(&self, other: &Self) -> bool {
|
||||
self.resource.leq(&other.resource) && self.auth.leq(&other.auth)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn resource_lifecycle_join_is_or() {
|
||||
let a = ResourceLifecycle::OPEN;
|
||||
let b = ResourceLifecycle::CLOSED;
|
||||
assert_eq!(
|
||||
a.join(&b),
|
||||
ResourceLifecycle::OPEN | ResourceLifecycle::CLOSED
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resource_lifecycle_bot_identity() {
|
||||
let a = ResourceLifecycle::OPEN;
|
||||
assert_eq!(a.join(&ResourceLifecycle::bot()), a);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resource_lifecycle_leq() {
|
||||
let a = ResourceLifecycle::OPEN;
|
||||
let b = ResourceLifecycle::OPEN | ResourceLifecycle::CLOSED;
|
||||
assert!(a.leq(&b));
|
||||
assert!(!b.leq(&a));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resource_domain_join_merges_keys() {
|
||||
let mut a = ResourceDomainState::new();
|
||||
let mut b = ResourceDomainState::new();
|
||||
let sym_x = SymbolId(0);
|
||||
let sym_y = SymbolId(1);
|
||||
|
||||
a.set(sym_x, ResourceLifecycle::OPEN);
|
||||
b.set(sym_x, ResourceLifecycle::CLOSED);
|
||||
b.set(sym_y, ResourceLifecycle::OPEN);
|
||||
|
||||
let joined = a.join(&b);
|
||||
assert_eq!(
|
||||
joined.get(sym_x),
|
||||
ResourceLifecycle::OPEN | ResourceLifecycle::CLOSED
|
||||
);
|
||||
assert_eq!(joined.get(sym_y), ResourceLifecycle::OPEN);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn auth_level_join_is_min() {
|
||||
assert_eq!(
|
||||
AuthLevel::Admin.join(&AuthLevel::Unauthed),
|
||||
AuthLevel::Unauthed
|
||||
);
|
||||
assert_eq!(AuthLevel::Authed.join(&AuthLevel::Admin), AuthLevel::Authed);
|
||||
assert_eq!(
|
||||
AuthLevel::Authed.join(&AuthLevel::Authed),
|
||||
AuthLevel::Authed
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn auth_domain_join_intersects_validated() {
|
||||
let sym_a = SymbolId(0);
|
||||
let sym_b = SymbolId(1);
|
||||
let sym_c = SymbolId(2);
|
||||
|
||||
let a = AuthDomainState {
|
||||
auth_level: AuthLevel::Authed,
|
||||
validated: [sym_a, sym_b].into_iter().collect(),
|
||||
};
|
||||
let b = AuthDomainState {
|
||||
auth_level: AuthLevel::Admin,
|
||||
validated: [sym_b, sym_c].into_iter().collect(),
|
||||
};
|
||||
|
||||
let joined = a.join(&b);
|
||||
assert_eq!(joined.auth_level, AuthLevel::Authed);
|
||||
assert_eq!(joined.validated, [sym_b].into_iter().collect());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn product_state_join() {
|
||||
let a = ProductState::initial();
|
||||
let b = ProductState::initial();
|
||||
let joined = a.join(&b);
|
||||
assert_eq!(joined, ProductState::initial());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn may_must_leak_semantics() {
|
||||
// Must-leak: OPEN only
|
||||
let must_leak = ResourceLifecycle::OPEN;
|
||||
assert!(must_leak.contains(ResourceLifecycle::OPEN));
|
||||
assert!(!must_leak.contains(ResourceLifecycle::CLOSED));
|
||||
assert!(!must_leak.contains(ResourceLifecycle::MOVED));
|
||||
|
||||
// May-leak: OPEN | CLOSED (some paths close, some don't)
|
||||
let may_leak = ResourceLifecycle::OPEN | ResourceLifecycle::CLOSED;
|
||||
assert!(may_leak.contains(ResourceLifecycle::OPEN));
|
||||
assert!(may_leak.contains(ResourceLifecycle::CLOSED));
|
||||
|
||||
// No leak: CLOSED only
|
||||
let no_leak = ResourceLifecycle::CLOSED;
|
||||
assert!(!no_leak.contains(ResourceLifecycle::OPEN));
|
||||
assert!(no_leak.contains(ResourceLifecycle::CLOSED));
|
||||
}
|
||||
|
||||
// SymbolId is a newtype used in domain tests; ensure it's Copy
|
||||
#[test]
|
||||
fn symbol_id_is_copy() {
|
||||
let s = SymbolId(0);
|
||||
let s2 = s;
|
||||
assert_eq!(s, s2);
|
||||
}
|
||||
}
|
||||
288
src/state/engine.rs
Normal file
288
src/state/engine.rs
Normal file
|
|
@ -0,0 +1,288 @@
|
|||
use super::lattice::Lattice;
|
||||
use crate::cfg::{Cfg, EdgeKind, NodeInfo};
|
||||
use petgraph::graph::NodeIndex;
|
||||
use petgraph::visit::EdgeRef;
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
|
||||
/// Maximum tracked variables per function (guarded degradation).
|
||||
pub const MAX_TRACKED_VARS: usize = 64;
|
||||
|
||||
/// Default worklist iteration budget.
|
||||
pub const MAX_WORKLIST_ITERATIONS: usize = 100_000;
|
||||
|
||||
/// Generic transfer function trait for forward dataflow analysis.
|
||||
///
|
||||
/// Domains implement this to define how abstract state flows through
|
||||
/// CFG nodes and what events (findings) are emitted.
|
||||
pub trait Transfer<S: Lattice> {
|
||||
/// Side-channel events emitted during transfer (e.g., findings, violations).
|
||||
type Event: Clone;
|
||||
|
||||
/// Apply the transfer function to a node, returning the output state
|
||||
/// and any events.
|
||||
fn apply(
|
||||
&self,
|
||||
node: NodeIndex,
|
||||
info: &NodeInfo,
|
||||
edge: Option<EdgeKind>,
|
||||
state: S,
|
||||
) -> (S, Vec<Self::Event>);
|
||||
|
||||
/// Per-domain iteration budget. Defaults to [`MAX_WORKLIST_ITERATIONS`].
|
||||
fn iteration_budget(&self) -> usize {
|
||||
MAX_WORKLIST_ITERATIONS
|
||||
}
|
||||
|
||||
/// Called when the budget is exhausted. Returns true if the engine
|
||||
/// should continue with the current (non-converged) state, false to bail.
|
||||
fn on_budget_exceeded(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of running the forward dataflow engine.
|
||||
pub struct DataflowResult<S, E> {
|
||||
/// Converged state at the entry of each node.
|
||||
pub states: HashMap<NodeIndex, S>,
|
||||
/// Events emitted during Phase 2 transfer over converged states.
|
||||
pub events: Vec<E>,
|
||||
/// Whether the analysis converged (false if budget was hit).
|
||||
#[allow(dead_code)]
|
||||
pub converged: bool,
|
||||
}
|
||||
|
||||
/// Run a forward worklist dataflow analysis over the CFG.
|
||||
///
|
||||
/// Two-phase design:
|
||||
/// - Phase 1: fixed-point iteration to converge states (no event collection).
|
||||
/// - Phase 2: single pass over converged states to collect events.
|
||||
///
|
||||
/// Termination is guaranteed by lattice finiteness + iteration budget.
|
||||
pub fn run_forward<S: Lattice, T: Transfer<S>>(
|
||||
cfg: &Cfg,
|
||||
entry: NodeIndex,
|
||||
transfer: &T,
|
||||
initial: S,
|
||||
) -> DataflowResult<S, T::Event> {
|
||||
let mut states: HashMap<NodeIndex, S> = HashMap::new();
|
||||
let budget = transfer.iteration_budget();
|
||||
|
||||
// Initialize entry node
|
||||
states.insert(entry, initial);
|
||||
|
||||
// ── Phase 1: fixed-point iteration (compute converged states) ─────
|
||||
let mut worklist: VecDeque<NodeIndex> = VecDeque::new();
|
||||
worklist.push_back(entry);
|
||||
|
||||
let mut iterations: usize = 0;
|
||||
let mut converged = true;
|
||||
|
||||
while let Some(node) = worklist.pop_front() {
|
||||
iterations += 1;
|
||||
if iterations > budget {
|
||||
converged = !transfer.on_budget_exceeded();
|
||||
if !converged {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let node_state = match states.get(&node) {
|
||||
Some(s) => s.clone(),
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let edges: Vec<_> = cfg.edges(node).map(|e| (*e.weight(), e.target())).collect();
|
||||
|
||||
// No outgoing edges — nothing to propagate (exit/dead end).
|
||||
if edges.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (edge_kind, target) in edges {
|
||||
let info = &cfg[node];
|
||||
let (out_state, _events) =
|
||||
transfer.apply(node, info, Some(edge_kind), node_state.clone());
|
||||
|
||||
// Join into target's state
|
||||
let target_state = states.get(&target);
|
||||
let new_target = match target_state {
|
||||
Some(existing) => existing.join(&out_state),
|
||||
None => out_state,
|
||||
};
|
||||
|
||||
let changed = target_state.is_none_or(|existing| *existing != new_target);
|
||||
if changed {
|
||||
states.insert(target, new_target);
|
||||
if !worklist.contains(&target) {
|
||||
worklist.push_back(target);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Phase 2: single pass over converged states to collect events ──
|
||||
let mut events: Vec<T::Event> = Vec::new();
|
||||
let mut seen_edges: std::collections::HashSet<(NodeIndex, NodeIndex)> =
|
||||
std::collections::HashSet::new();
|
||||
|
||||
for node in states.keys().copied().collect::<Vec<_>>() {
|
||||
let node_state = match states.get(&node) {
|
||||
Some(s) => s.clone(),
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let edges: Vec<_> = cfg.edges(node).map(|e| (*e.weight(), e.target())).collect();
|
||||
|
||||
if edges.is_empty() {
|
||||
// Exit / dead end — apply transfer for event collection.
|
||||
let info = &cfg[node];
|
||||
let (_out_state, new_events) = transfer.apply(node, info, None, node_state);
|
||||
events.extend(new_events);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (edge_kind, target) in edges {
|
||||
if !seen_edges.insert((node, target)) {
|
||||
continue;
|
||||
}
|
||||
let info = &cfg[node];
|
||||
let (_out_state, new_events) =
|
||||
transfer.apply(node, info, Some(edge_kind), node_state.clone());
|
||||
events.extend(new_events);
|
||||
}
|
||||
}
|
||||
|
||||
DataflowResult {
|
||||
states,
|
||||
events,
|
||||
converged,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::cfg::{EdgeKind, NodeInfo, StmtKind};
|
||||
use crate::cfg_analysis::rules;
|
||||
use crate::state::domain::ResourceLifecycle;
|
||||
use crate::state::symbol::SymbolInterner;
|
||||
use crate::state::transfer::DefaultTransfer;
|
||||
use crate::symbol::Lang;
|
||||
use petgraph::Graph;
|
||||
|
||||
fn make_node(kind: StmtKind) -> NodeInfo {
|
||||
NodeInfo {
|
||||
kind,
|
||||
span: (0, 0),
|
||||
label: None,
|
||||
defines: None,
|
||||
uses: vec![],
|
||||
callee: None,
|
||||
enclosing_func: None,
|
||||
call_ordinal: 0,
|
||||
condition_text: None,
|
||||
condition_vars: vec![],
|
||||
condition_negated: false,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn linear_cfg_converges() {
|
||||
use crate::state::domain::ProductState;
|
||||
|
||||
// Entry → fopen(f) → fclose(f) → Exit
|
||||
let mut cfg: Cfg = Graph::new();
|
||||
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
||||
let open_node = cfg.add_node(NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
defines: Some("f".into()),
|
||||
callee: Some("fopen".into()),
|
||||
..make_node(StmtKind::Call)
|
||||
});
|
||||
let close_node = cfg.add_node(NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
uses: vec!["f".into()],
|
||||
callee: Some("fclose".into()),
|
||||
..make_node(StmtKind::Call)
|
||||
});
|
||||
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
||||
|
||||
cfg.add_edge(entry, open_node, EdgeKind::Seq);
|
||||
cfg.add_edge(open_node, close_node, EdgeKind::Seq);
|
||||
cfg.add_edge(close_node, exit, EdgeKind::Seq);
|
||||
|
||||
let interner = SymbolInterner::from_cfg(&cfg);
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::C,
|
||||
resource_pairs: rules::resource_pairs(Lang::C),
|
||||
interner: &interner,
|
||||
};
|
||||
|
||||
let result = run_forward(&cfg, entry, &transfer, ProductState::initial());
|
||||
|
||||
// No events (clean open→close)
|
||||
assert!(result.events.is_empty());
|
||||
assert!(result.converged);
|
||||
|
||||
// At exit, f should be CLOSED
|
||||
let sym_f = interner.get("f").unwrap();
|
||||
let exit_state = result.states.get(&exit).unwrap();
|
||||
assert_eq!(exit_state.resource.get(sym_f), ResourceLifecycle::CLOSED);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn diamond_cfg_joins_states() {
|
||||
use crate::state::domain::ProductState;
|
||||
|
||||
// Entry
|
||||
// |
|
||||
// fopen(f)
|
||||
// |
|
||||
// If
|
||||
// / \
|
||||
// fclose(f) (no close)
|
||||
// \ /
|
||||
// Exit
|
||||
let mut cfg: Cfg = Graph::new();
|
||||
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
||||
let open_node = cfg.add_node(NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
defines: Some("f".into()),
|
||||
callee: Some("fopen".into()),
|
||||
..make_node(StmtKind::Call)
|
||||
});
|
||||
let if_node = cfg.add_node(make_node(StmtKind::If));
|
||||
let close_node = cfg.add_node(NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
uses: vec!["f".into()],
|
||||
callee: Some("fclose".into()),
|
||||
..make_node(StmtKind::Call)
|
||||
});
|
||||
let no_close = cfg.add_node(make_node(StmtKind::Seq));
|
||||
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
||||
|
||||
cfg.add_edge(entry, open_node, EdgeKind::Seq);
|
||||
cfg.add_edge(open_node, if_node, EdgeKind::Seq);
|
||||
cfg.add_edge(if_node, close_node, EdgeKind::True);
|
||||
cfg.add_edge(if_node, no_close, EdgeKind::False);
|
||||
cfg.add_edge(close_node, exit, EdgeKind::Seq);
|
||||
cfg.add_edge(no_close, exit, EdgeKind::Seq);
|
||||
|
||||
let interner = SymbolInterner::from_cfg(&cfg);
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::C,
|
||||
resource_pairs: rules::resource_pairs(Lang::C),
|
||||
interner: &interner,
|
||||
};
|
||||
|
||||
let result = run_forward(&cfg, entry, &transfer, ProductState::initial());
|
||||
|
||||
// At exit, f should be OPEN | CLOSED (may-leak)
|
||||
let sym_f = interner.get("f").unwrap();
|
||||
let exit_state = result.states.get(&exit).unwrap();
|
||||
assert_eq!(
|
||||
exit_state.resource.get(sym_f),
|
||||
ResourceLifecycle::OPEN | ResourceLifecycle::CLOSED
|
||||
);
|
||||
}
|
||||
}
|
||||
355
src/state/facts.rs
Normal file
355
src/state/facts.rs
Normal file
|
|
@ -0,0 +1,355 @@
|
|||
use super::domain::{AuthLevel, ProductState, ResourceLifecycle};
|
||||
use super::engine::DataflowResult;
|
||||
use super::symbol::SymbolInterner;
|
||||
use super::transfer::{TransferEvent, TransferEventKind};
|
||||
use crate::cfg::{Cfg, StmtKind};
|
||||
use crate::labels::{Cap, DataLabel};
|
||||
use crate::patterns::Severity;
|
||||
use crate::symbol::Lang;
|
||||
use petgraph::visit::IntoNodeReferences;
|
||||
|
||||
/// Normalize a callee description for display.
|
||||
fn sanitize_desc(s: &str) -> String {
|
||||
crate::fmt::normalize_snippet(s)
|
||||
}
|
||||
|
||||
/// A finding produced by state analysis.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct StateFinding {
|
||||
pub rule_id: String,
|
||||
pub severity: Severity,
|
||||
pub span: (usize, usize),
|
||||
pub message: String,
|
||||
/// State machine that produced this finding: `"resource"` or `"auth"`.
|
||||
pub machine: &'static str,
|
||||
/// Variable name involved, if available.
|
||||
pub subject: Option<String>,
|
||||
/// State before the event (e.g. `"closed"`, `"open"`, `"unauthed"`).
|
||||
pub from_state: &'static str,
|
||||
/// State after the event (e.g. `"used"`, `"closed"`, `"leaked"`, `"access"`).
|
||||
pub to_state: &'static str,
|
||||
}
|
||||
|
||||
/// Extract findings from converged dataflow state + transfer events.
|
||||
pub fn extract_findings(
|
||||
result: &DataflowResult<ProductState, TransferEvent>,
|
||||
cfg: &Cfg,
|
||||
interner: &SymbolInterner,
|
||||
lang: Lang,
|
||||
func_summaries: &crate::cfg::FuncSummaries,
|
||||
) -> Vec<StateFinding> {
|
||||
let mut findings = Vec::new();
|
||||
|
||||
// ── 1. Use-after-close from transfer events ──────────────────────────
|
||||
for event in &result.events {
|
||||
let info = &cfg[event.node];
|
||||
let var_name = interner.resolve(event.var);
|
||||
match event.kind {
|
||||
TransferEventKind::UseAfterClose => {
|
||||
findings.push(StateFinding {
|
||||
rule_id: "state-use-after-close".into(),
|
||||
severity: Severity::High,
|
||||
span: info.span,
|
||||
message: format!("variable `{var_name}` used after close"),
|
||||
machine: "resource",
|
||||
subject: Some(var_name.to_string()),
|
||||
from_state: "closed",
|
||||
to_state: "used",
|
||||
});
|
||||
}
|
||||
TransferEventKind::DoubleClose => {
|
||||
findings.push(StateFinding {
|
||||
rule_id: "state-double-close".into(),
|
||||
severity: Severity::Medium,
|
||||
span: info.span,
|
||||
message: format!("variable `{var_name}` closed twice"),
|
||||
machine: "resource",
|
||||
subject: Some(var_name.to_string()),
|
||||
from_state: "closed",
|
||||
to_state: "closed",
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── 2. Resource leaks at Exit and function-Return nodes ──────────────
|
||||
for (idx, info) in cfg.node_references() {
|
||||
// Check both the file-level Exit node and the *synthesised* function
|
||||
// exit node (a Return node). Skip early-return nodes — they flow
|
||||
// into the synthesised exit and carry only path-specific state.
|
||||
// The synthesised exit is the one Return node that does NOT have an
|
||||
// outgoing edge to another Return in the same function.
|
||||
let is_exit = info.kind == StmtKind::Exit;
|
||||
let is_func_exit = info.kind == StmtKind::Return && info.enclosing_func.is_some();
|
||||
if !is_exit && !is_func_exit {
|
||||
continue;
|
||||
}
|
||||
if is_func_exit {
|
||||
use petgraph::Direction;
|
||||
let is_early_return = cfg
|
||||
.neighbors_directed(idx, Direction::Outgoing)
|
||||
.any(|succ| {
|
||||
let s = &cfg[succ];
|
||||
s.kind == StmtKind::Return && s.enclosing_func == info.enclosing_func
|
||||
});
|
||||
if is_early_return {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
let Some(state) = result.states.get(&idx) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
for (&sym, &lifecycle) in &state.resource.vars {
|
||||
if !lifecycle.contains(ResourceLifecycle::OPEN) {
|
||||
continue;
|
||||
}
|
||||
let var_name = interner.resolve(sym);
|
||||
|
||||
if !lifecycle.contains(ResourceLifecycle::CLOSED)
|
||||
&& !lifecycle.contains(ResourceLifecycle::MOVED)
|
||||
{
|
||||
// Definite leak: open on all paths, never closed
|
||||
// Find the acquire span by scanning backwards for this variable's define
|
||||
let acquire_span = find_acquire_span(cfg, sym, interner);
|
||||
findings.push(StateFinding {
|
||||
rule_id: "state-resource-leak".into(),
|
||||
severity: Severity::Medium,
|
||||
span: acquire_span.unwrap_or(info.span),
|
||||
message: format!("resource `{var_name}` is never closed"),
|
||||
machine: "resource",
|
||||
subject: Some(var_name.to_string()),
|
||||
from_state: "open",
|
||||
to_state: "leaked",
|
||||
});
|
||||
} else if lifecycle.contains(ResourceLifecycle::CLOSED) {
|
||||
// May-leak: open on some paths, closed on others
|
||||
let acquire_span = find_acquire_span(cfg, sym, interner);
|
||||
findings.push(StateFinding {
|
||||
rule_id: "state-resource-leak-possible".into(),
|
||||
severity: Severity::Low,
|
||||
span: acquire_span.unwrap_or(info.span),
|
||||
message: format!("resource `{var_name}` may not be closed on all paths"),
|
||||
machine: "resource",
|
||||
subject: Some(var_name.to_string()),
|
||||
from_state: "open",
|
||||
to_state: "possibly_leaked",
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── 3. Auth-required sinks ───────────────────────────────────────────
|
||||
// Check if any function is a web entrypoint
|
||||
let has_web_entrypoint = cfg.node_references().any(|(_, info)| {
|
||||
if let Some(ref func_name) = info.enclosing_func {
|
||||
is_web_entrypoint_simple(func_name, lang, func_summaries, cfg)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
});
|
||||
|
||||
if has_web_entrypoint {
|
||||
for (idx, info) in cfg.node_references() {
|
||||
if !is_privileged_sink(info) {
|
||||
continue;
|
||||
}
|
||||
let Some(state) = result.states.get(&idx) else {
|
||||
continue;
|
||||
};
|
||||
if state.auth.auth_level == AuthLevel::Unauthed {
|
||||
let callee_desc = sanitize_desc(info.callee.as_deref().unwrap_or("(sensitive op)"));
|
||||
findings.push(StateFinding {
|
||||
rule_id: "state-unauthed-access".into(),
|
||||
severity: Severity::High,
|
||||
span: info.span,
|
||||
message: format!(
|
||||
"sensitive operation `{callee_desc}` reached without authentication"
|
||||
),
|
||||
machine: "auth",
|
||||
subject: None,
|
||||
from_state: "unauthed",
|
||||
to_state: "access",
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Dedup
|
||||
findings.sort_by(|a, b| a.span.cmp(&b.span).then_with(|| a.rule_id.cmp(&b.rule_id)));
|
||||
findings.dedup_by(|a, b| a.span == b.span && a.rule_id == b.rule_id);
|
||||
|
||||
findings
|
||||
}
|
||||
|
||||
/// Find the span where a variable was acquired (defined via Call node).
|
||||
fn find_acquire_span(
|
||||
cfg: &Cfg,
|
||||
sym: super::symbol::SymbolId,
|
||||
interner: &SymbolInterner,
|
||||
) -> Option<(usize, usize)> {
|
||||
let var_name = interner.resolve(sym);
|
||||
for (_idx, info) in cfg.node_references() {
|
||||
if info.kind == StmtKind::Call
|
||||
&& let Some(ref def) = info.defines
|
||||
&& def == var_name
|
||||
{
|
||||
return Some(info.span);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Check if a node is a privileged sink (shell execution or file I/O).
|
||||
fn is_privileged_sink(info: &crate::cfg::NodeInfo) -> bool {
|
||||
match info.label {
|
||||
Some(DataLabel::Sink(caps)) => caps.intersects(Cap::SHELL_ESCAPE | Cap::FILE_IO),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Simplified web entrypoint check (avoids AnalysisContext dependency).
|
||||
fn is_web_entrypoint_simple(
|
||||
func_name: &str,
|
||||
lang: Lang,
|
||||
func_summaries: &crate::cfg::FuncSummaries,
|
||||
_cfg: &Cfg,
|
||||
) -> bool {
|
||||
let name_lower = func_name.to_ascii_lowercase();
|
||||
|
||||
// Skip bare "main" — it's typically a CLI entry
|
||||
if name_lower == "main" {
|
||||
return false;
|
||||
}
|
||||
|
||||
let is_handler_name = name_lower.starts_with("handle_")
|
||||
|| name_lower.starts_with("route_")
|
||||
|| name_lower.starts_with("api_")
|
||||
|| name_lower.starts_with("serve_")
|
||||
|| name_lower.starts_with("process_")
|
||||
|| name_lower == "handler";
|
||||
|
||||
if !is_handler_name {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check for web-like parameters
|
||||
let web_params: &[&str] = match lang {
|
||||
Lang::Rust => &["request", "req", "json", "query", "form", "payload", "body"],
|
||||
Lang::JavaScript | Lang::TypeScript => &["req", "request", "ctx", "res", "response"],
|
||||
Lang::Python => &["request", "req"],
|
||||
Lang::Go => &["w", "writer", "r", "req", "request"],
|
||||
Lang::Java => &["request", "req"],
|
||||
_ => &["request", "req"],
|
||||
};
|
||||
|
||||
let has_web_params = func_summaries.values().any(|s| {
|
||||
s.param_names
|
||||
.iter()
|
||||
.any(|p| web_params.contains(&p.to_ascii_lowercase().as_str()))
|
||||
});
|
||||
|
||||
// Strong handler names are enough even without web params
|
||||
let strong_name = name_lower.starts_with("handle_")
|
||||
|| name_lower.starts_with("route_")
|
||||
|| name_lower.starts_with("api_");
|
||||
|
||||
has_web_params || strong_name
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::cfg::{EdgeKind, NodeInfo};
|
||||
use crate::cfg_analysis::rules;
|
||||
use crate::state::domain::ProductState;
|
||||
use crate::state::engine;
|
||||
use crate::state::symbol::SymbolInterner;
|
||||
use crate::state::transfer::DefaultTransfer;
|
||||
use petgraph::Graph;
|
||||
use std::collections::HashMap;
|
||||
|
||||
fn make_node(kind: StmtKind) -> NodeInfo {
|
||||
NodeInfo {
|
||||
kind,
|
||||
span: (0, 0),
|
||||
label: None,
|
||||
defines: None,
|
||||
uses: vec![],
|
||||
callee: None,
|
||||
enclosing_func: None,
|
||||
call_ordinal: 0,
|
||||
condition_text: None,
|
||||
condition_vars: vec![],
|
||||
condition_negated: false,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_resource_leak() {
|
||||
// Entry → fopen(f) → Exit (no close)
|
||||
let mut cfg: Cfg = Graph::new();
|
||||
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
||||
let open_node = cfg.add_node(NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
span: (10, 20),
|
||||
defines: Some("f".into()),
|
||||
callee: Some("fopen".into()),
|
||||
..make_node(StmtKind::Call)
|
||||
});
|
||||
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
||||
|
||||
cfg.add_edge(entry, open_node, EdgeKind::Seq);
|
||||
cfg.add_edge(open_node, exit, EdgeKind::Seq);
|
||||
|
||||
let interner = SymbolInterner::from_cfg(&cfg);
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::C,
|
||||
resource_pairs: rules::resource_pairs(Lang::C),
|
||||
interner: &interner,
|
||||
};
|
||||
|
||||
let result = engine::run_forward(&cfg, entry, &transfer, ProductState::initial());
|
||||
let findings = extract_findings(&result, &cfg, &interner, Lang::C, &HashMap::new());
|
||||
|
||||
assert_eq!(findings.len(), 1);
|
||||
assert_eq!(findings[0].rule_id, "state-resource-leak");
|
||||
assert!(findings[0].message.contains("f"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clean_open_close_no_findings() {
|
||||
// Entry → fopen(f) → fclose(f) → Exit
|
||||
let mut cfg: Cfg = Graph::new();
|
||||
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
||||
let open_node = cfg.add_node(NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
defines: Some("f".into()),
|
||||
callee: Some("fopen".into()),
|
||||
..make_node(StmtKind::Call)
|
||||
});
|
||||
let close_node = cfg.add_node(NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
uses: vec!["f".into()],
|
||||
callee: Some("fclose".into()),
|
||||
..make_node(StmtKind::Call)
|
||||
});
|
||||
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
||||
|
||||
cfg.add_edge(entry, open_node, EdgeKind::Seq);
|
||||
cfg.add_edge(open_node, close_node, EdgeKind::Seq);
|
||||
cfg.add_edge(close_node, exit, EdgeKind::Seq);
|
||||
|
||||
let interner = SymbolInterner::from_cfg(&cfg);
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::C,
|
||||
resource_pairs: rules::resource_pairs(Lang::C),
|
||||
interner: &interner,
|
||||
};
|
||||
|
||||
let result = engine::run_forward(&cfg, entry, &transfer, ProductState::initial());
|
||||
let findings = extract_findings(&result, &cfg, &interner, Lang::C, &HashMap::new());
|
||||
|
||||
assert!(findings.is_empty());
|
||||
}
|
||||
}
|
||||
91
src/state/lattice.rs
Normal file
91
src/state/lattice.rs
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
/// A bounded semi-lattice with bottom element and monotone join.
|
||||
///
|
||||
/// Implementations must satisfy:
|
||||
/// - `join` is commutative, associative, and idempotent
|
||||
/// - `bot()` is the identity for `join`
|
||||
/// - `leq(a, b)` iff `join(a, b) == b`
|
||||
#[allow(dead_code)]
|
||||
pub trait Lattice: Clone + Eq + Sized {
|
||||
/// Bottom element (least information / unreachable).
|
||||
fn bot() -> Self;
|
||||
|
||||
/// Least upper bound: merge two abstract values.
|
||||
fn join(&self, other: &Self) -> Self;
|
||||
|
||||
/// Partial order: `self ⊑ other`.
|
||||
fn leq(&self, other: &Self) -> bool;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
/// A trivial 3-element lattice for testing the trait contract.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
struct Three(u8); // 0=bot, 1, 2=top-ish
|
||||
|
||||
impl Lattice for Three {
|
||||
fn bot() -> Self {
|
||||
Three(0)
|
||||
}
|
||||
fn join(&self, other: &Self) -> Self {
|
||||
Three(self.0.max(other.0))
|
||||
}
|
||||
fn leq(&self, other: &Self) -> bool {
|
||||
self.0 <= other.0
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bot_identity() {
|
||||
let a = Three(1);
|
||||
assert_eq!(a.join(&Three::bot()), a);
|
||||
assert_eq!(Three::bot().join(&a), a);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn join_commutative() {
|
||||
let a = Three(1);
|
||||
let b = Three(2);
|
||||
assert_eq!(a.join(&b), b.join(&a));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn join_associative() {
|
||||
let a = Three(0);
|
||||
let b = Three(1);
|
||||
let c = Three(2);
|
||||
assert_eq!(a.join(&b).join(&c), a.join(&b.join(&c)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn join_idempotent() {
|
||||
let a = Three(1);
|
||||
assert_eq!(a.join(&a), a);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn leq_reflexive() {
|
||||
let a = Three(1);
|
||||
assert!(a.leq(&a));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn leq_transitive() {
|
||||
let a = Three(0);
|
||||
let b = Three(1);
|
||||
let c = Three(2);
|
||||
assert!(a.leq(&b));
|
||||
assert!(b.leq(&c));
|
||||
assert!(a.leq(&c));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn leq_consistent_with_join() {
|
||||
let a = Three(1);
|
||||
let b = Three(2);
|
||||
// a ⊑ b iff join(a, b) == b
|
||||
assert!(a.leq(&b));
|
||||
assert_eq!(a.join(&b), b);
|
||||
}
|
||||
}
|
||||
62
src/state/mod.rs
Normal file
62
src/state/mod.rs
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
pub mod domain;
|
||||
pub mod engine;
|
||||
pub mod facts;
|
||||
pub mod lattice;
|
||||
pub mod symbol;
|
||||
pub mod transfer;
|
||||
|
||||
use crate::cfg::{Cfg, FuncSummaries};
|
||||
use crate::cfg_analysis::rules;
|
||||
use crate::summary::GlobalSummaries;
|
||||
use crate::symbol::Lang;
|
||||
use domain::ProductState;
|
||||
use engine::MAX_TRACKED_VARS;
|
||||
use facts::StateFinding;
|
||||
use petgraph::graph::NodeIndex;
|
||||
use symbol::SymbolInterner;
|
||||
use transfer::DefaultTransfer;
|
||||
|
||||
/// Run state-model dataflow analysis on a single function's CFG.
|
||||
///
|
||||
/// Returns findings for use-after-close, double-close, resource leaks,
|
||||
/// and unauthenticated access to sensitive sinks.
|
||||
pub fn run_state_analysis(
|
||||
cfg: &Cfg,
|
||||
entry: NodeIndex,
|
||||
lang: Lang,
|
||||
_source_bytes: &[u8],
|
||||
func_summaries: &FuncSummaries,
|
||||
_global_summaries: Option<&GlobalSummaries>,
|
||||
) -> Vec<StateFinding> {
|
||||
let _span = tracing::debug_span!("run_state_analysis").entered();
|
||||
|
||||
// 1. Build symbol interner from CFG
|
||||
let interner = SymbolInterner::from_cfg(cfg);
|
||||
|
||||
// Guarded degradation: cap tracked variables
|
||||
if interner.len() > MAX_TRACKED_VARS {
|
||||
tracing::warn!(
|
||||
symbols = interner.len(),
|
||||
max = MAX_TRACKED_VARS,
|
||||
"state analysis: too many variables, capping tracking"
|
||||
);
|
||||
// Still run — the interner has all symbols, but transfer will only
|
||||
// track the first MAX_TRACKED_VARS due to HashMap insertion order.
|
||||
// This is conservative but safe.
|
||||
}
|
||||
|
||||
// 2. Construct transfer function
|
||||
let resource_pairs = rules::resource_pairs(lang);
|
||||
let transfer = DefaultTransfer {
|
||||
lang,
|
||||
resource_pairs,
|
||||
interner: &interner,
|
||||
};
|
||||
|
||||
// 3. Run forward dataflow engine
|
||||
let initial = ProductState::initial();
|
||||
let result = engine::run_forward(cfg, entry, &transfer, initial);
|
||||
|
||||
// 4. Extract findings
|
||||
facts::extract_findings(&result, cfg, &interner, lang, func_summaries)
|
||||
}
|
||||
101
src/state/symbol.rs
Normal file
101
src/state/symbol.rs
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
use crate::cfg::Cfg;
|
||||
use petgraph::visit::IntoNodeReferences;
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Cheap `Copy` handle into a [`SymbolInterner`].
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub struct SymbolId(pub(crate) u32);
|
||||
|
||||
/// Per-function interner: maps `String` ↔ [`SymbolId`].
|
||||
///
|
||||
/// Built once from CFG node `defines`/`uses`, reused throughout analysis.
|
||||
#[derive(Default)]
|
||||
pub struct SymbolInterner {
|
||||
to_id: HashMap<String, SymbolId>,
|
||||
to_str: Vec<String>,
|
||||
}
|
||||
|
||||
impl SymbolInterner {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Intern a name, returning its stable [`SymbolId`].
|
||||
pub fn intern(&mut self, name: &str) -> SymbolId {
|
||||
if let Some(&id) = self.to_id.get(name) {
|
||||
return id;
|
||||
}
|
||||
let id = SymbolId(self.to_str.len() as u32);
|
||||
self.to_str.push(name.to_owned());
|
||||
self.to_id.insert(name.to_owned(), id);
|
||||
id
|
||||
}
|
||||
|
||||
/// Look up a name without interning it.
|
||||
pub fn get(&self, name: &str) -> Option<SymbolId> {
|
||||
self.to_id.get(name).copied()
|
||||
}
|
||||
|
||||
/// Resolve an id back to its string.
|
||||
pub fn resolve(&self, id: SymbolId) -> &str {
|
||||
&self.to_str[id.0 as usize]
|
||||
}
|
||||
|
||||
/// Number of interned symbols.
|
||||
pub fn len(&self) -> usize {
|
||||
self.to_str.len()
|
||||
}
|
||||
|
||||
/// Whether the interner is empty.
|
||||
#[allow(dead_code)]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.to_str.is_empty()
|
||||
}
|
||||
|
||||
/// Build from a CFG: walk all nodes, intern every `defines`/`uses` string.
|
||||
pub fn from_cfg(cfg: &Cfg) -> Self {
|
||||
let mut interner = Self::new();
|
||||
for (_idx, info) in cfg.node_references() {
|
||||
if let Some(ref d) = info.defines {
|
||||
interner.intern(d);
|
||||
}
|
||||
for u in &info.uses {
|
||||
interner.intern(u);
|
||||
}
|
||||
}
|
||||
interner
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn intern_resolve_roundtrip() {
|
||||
let mut interner = SymbolInterner::new();
|
||||
let a = interner.intern("foo");
|
||||
let b = interner.intern("bar");
|
||||
let a2 = interner.intern("foo");
|
||||
|
||||
assert_eq!(a, a2);
|
||||
assert_ne!(a, b);
|
||||
assert_eq!(interner.resolve(a), "foo");
|
||||
assert_eq!(interner.resolve(b), "bar");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_returns_none_for_unknown() {
|
||||
let interner = SymbolInterner::new();
|
||||
assert!(interner.get("missing").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn len_tracks_unique_symbols() {
|
||||
let mut interner = SymbolInterner::new();
|
||||
interner.intern("a");
|
||||
interner.intern("b");
|
||||
interner.intern("a"); // duplicate
|
||||
assert_eq!(interner.len(), 2);
|
||||
}
|
||||
}
|
||||
426
src/state/transfer.rs
Normal file
426
src/state/transfer.rs
Normal file
|
|
@ -0,0 +1,426 @@
|
|||
use super::domain::{AuthLevel, ProductState, ResourceLifecycle};
|
||||
use super::engine::Transfer;
|
||||
use super::symbol::{SymbolId, SymbolInterner};
|
||||
use crate::cfg::{EdgeKind, NodeInfo, StmtKind};
|
||||
use crate::cfg_analysis::rules::{self, ResourcePair};
|
||||
use crate::symbol::Lang;
|
||||
use petgraph::graph::NodeIndex;
|
||||
|
||||
/// Events emitted during transfer for illegal state transitions.
|
||||
/// These are NOT lattice values — they become findings in `facts.rs`.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TransferEvent {
|
||||
pub kind: TransferEventKind,
|
||||
pub node: NodeIndex,
|
||||
pub var: SymbolId,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum TransferEventKind {
|
||||
UseAfterClose,
|
||||
DoubleClose,
|
||||
}
|
||||
|
||||
/// Resource-use patterns: callees that read/write/operate on a resource handle
|
||||
/// (triggering use-after-close if the handle is closed).
|
||||
static RESOURCE_USE_PATTERNS: &[&str] = &[
|
||||
"read", "write", "send", "recv", "fread", "fwrite", "fgets", "fputs", "fprintf", "fscanf",
|
||||
"fflush", "fseek", "ftell", "rewind", "feof", "ferror", "fgetc", "fputc", "getc", "putc",
|
||||
"ungetc", "query", "execute", "fetch", "sendto", "recvfrom", "ioctl", "fcntl",
|
||||
// Memory access functions (for malloc/free use-after-free detection)
|
||||
"strcpy", "strncpy", "strcat", "strncat", "memcpy", "memmove", "memset", "memcmp", "strcmp",
|
||||
"strncmp", "strlen", "sprintf", "snprintf",
|
||||
];
|
||||
|
||||
/// Auth-call matchers for admin-level privilege.
|
||||
static ADMIN_PATTERNS: &[&str] = &[
|
||||
"is_admin",
|
||||
"hasrole",
|
||||
"has_role",
|
||||
"check_admin",
|
||||
"require_admin",
|
||||
];
|
||||
|
||||
pub struct DefaultTransfer<'a> {
|
||||
pub lang: Lang,
|
||||
pub resource_pairs: &'a [ResourcePair],
|
||||
pub interner: &'a SymbolInterner,
|
||||
}
|
||||
|
||||
impl Transfer<ProductState> for DefaultTransfer<'_> {
|
||||
type Event = TransferEvent;
|
||||
|
||||
fn apply(
|
||||
&self,
|
||||
node_idx: NodeIndex,
|
||||
info: &NodeInfo,
|
||||
edge: Option<EdgeKind>,
|
||||
mut state: ProductState,
|
||||
) -> (ProductState, Vec<TransferEvent>) {
|
||||
let mut events = Vec::new();
|
||||
|
||||
match info.kind {
|
||||
StmtKind::Call => {
|
||||
self.apply_call(node_idx, info, &mut state, &mut events);
|
||||
}
|
||||
StmtKind::If => {
|
||||
self.apply_if(info, edge, &mut state);
|
||||
}
|
||||
StmtKind::Seq => {
|
||||
self.apply_assignment(node_idx, info, &mut state);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
(state, events)
|
||||
}
|
||||
}
|
||||
|
||||
impl DefaultTransfer<'_> {
|
||||
fn apply_call(
|
||||
&self,
|
||||
node_idx: NodeIndex,
|
||||
info: &NodeInfo,
|
||||
state: &mut ProductState,
|
||||
events: &mut Vec<TransferEvent>,
|
||||
) {
|
||||
let callee = match &info.callee {
|
||||
Some(c) => c.to_ascii_lowercase(),
|
||||
None => return,
|
||||
};
|
||||
|
||||
// ── Resource acquire ─────────────────────────────────────────────
|
||||
for pair in self.resource_pairs {
|
||||
let is_acquire = pair.acquire.iter().any(|a| callee_matches(&callee, a));
|
||||
let is_excluded = pair
|
||||
.exclude_acquire
|
||||
.iter()
|
||||
.any(|e| callee_matches(&callee, e));
|
||||
|
||||
if is_acquire
|
||||
&& !is_excluded
|
||||
&& let Some(ref def) = info.defines
|
||||
&& let Some(sym) = self.interner.get(def)
|
||||
{
|
||||
state.resource.set(sym, ResourceLifecycle::OPEN);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Resource release ─────────────────────────────────────────────
|
||||
// Track which variables have already been released to avoid double-
|
||||
// matching across multiple resource pair definitions.
|
||||
let mut released: smallvec::SmallVec<[SymbolId; 4]> = smallvec::SmallVec::new();
|
||||
for pair in self.resource_pairs {
|
||||
let is_release = pair.release.iter().any(|r| callee_matches(&callee, r));
|
||||
if is_release {
|
||||
for used in &info.uses {
|
||||
if let Some(sym) = self.interner.get(used) {
|
||||
if released.contains(&sym) {
|
||||
continue;
|
||||
}
|
||||
let current = state.resource.get(sym);
|
||||
if current == ResourceLifecycle::CLOSED {
|
||||
// Double close
|
||||
events.push(TransferEvent {
|
||||
kind: TransferEventKind::DoubleClose,
|
||||
node: node_idx,
|
||||
var: sym,
|
||||
});
|
||||
} else if current.contains(ResourceLifecycle::OPEN) {
|
||||
state.resource.set(sym, ResourceLifecycle::CLOSED);
|
||||
}
|
||||
released.push(sym);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Resource use (read/write/etc.) ───────────────────────────────
|
||||
let is_use = RESOURCE_USE_PATTERNS
|
||||
.iter()
|
||||
.any(|p| callee_matches(&callee, p));
|
||||
if is_use {
|
||||
for used in &info.uses {
|
||||
if let Some(sym) = self.interner.get(used) {
|
||||
let current = state.resource.get(sym);
|
||||
if current == ResourceLifecycle::CLOSED {
|
||||
events.push(TransferEvent {
|
||||
kind: TransferEventKind::UseAfterClose,
|
||||
node: node_idx,
|
||||
var: sym,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Auth call ────────────────────────────────────────────────────
|
||||
let auth_rules = rules::auth_rules(self.lang);
|
||||
let is_auth = auth_rules.iter().any(|rule| {
|
||||
rule.matchers
|
||||
.iter()
|
||||
.any(|m| callee_matches(&callee, &m.to_ascii_lowercase()))
|
||||
});
|
||||
if is_auth {
|
||||
let is_admin = ADMIN_PATTERNS.iter().any(|p| callee_matches(&callee, p));
|
||||
let new_level = if is_admin {
|
||||
AuthLevel::Admin
|
||||
} else {
|
||||
AuthLevel::Authed
|
||||
};
|
||||
if new_level > state.auth.auth_level {
|
||||
state.auth.auth_level = new_level;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Validation call (guard) ──────────────────────────────────────
|
||||
if is_guard_like(&callee) {
|
||||
for used in &info.uses {
|
||||
if let Some(sym) = self.interner.get(used) {
|
||||
state.auth.validated.insert(sym);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn apply_if(&self, info: &NodeInfo, edge: Option<EdgeKind>, state: &mut ProductState) {
|
||||
// On the True edge of an If node whose condition is an auth check,
|
||||
// refine auth level.
|
||||
let is_true_edge = matches!(edge, Some(EdgeKind::True));
|
||||
if !is_true_edge {
|
||||
return;
|
||||
}
|
||||
|
||||
if let Some(ref cond) = info.condition_text {
|
||||
let cond_lower = cond.to_ascii_lowercase();
|
||||
|
||||
// Auth-related condition
|
||||
let auth_rules = rules::auth_rules(self.lang);
|
||||
let is_auth_cond = auth_rules.iter().any(|rule| {
|
||||
rule.matchers
|
||||
.iter()
|
||||
.any(|m| cond_lower.contains(&m.to_ascii_lowercase()))
|
||||
});
|
||||
if is_auth_cond && !info.condition_negated {
|
||||
let is_admin = ADMIN_PATTERNS.iter().any(|p| cond_lower.contains(p));
|
||||
let new_level = if is_admin {
|
||||
AuthLevel::Admin
|
||||
} else {
|
||||
AuthLevel::Authed
|
||||
};
|
||||
if new_level > state.auth.auth_level {
|
||||
state.auth.auth_level = new_level;
|
||||
}
|
||||
}
|
||||
|
||||
// Validation-related condition
|
||||
if is_guard_like(&cond_lower) && !info.condition_negated {
|
||||
for var in &info.condition_vars {
|
||||
if let Some(sym) = self.interner.get(var) {
|
||||
state.auth.validated.insert(sym);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn apply_assignment(&self, _node_idx: NodeIndex, info: &NodeInfo, state: &mut ProductState) {
|
||||
// Ownership transfer: if `defines` reassigns a tracked resource
|
||||
// variable from a `uses` variable, transfer the lifecycle.
|
||||
if let Some(ref def) = info.defines
|
||||
&& let Some(def_sym) = self.interner.get(def)
|
||||
{
|
||||
// If the RHS is a tracked resource, transfer its state
|
||||
for used in &info.uses {
|
||||
if let Some(use_sym) = self.interner.get(used) {
|
||||
let lc = state.resource.get(use_sym);
|
||||
if lc.contains(ResourceLifecycle::OPEN) {
|
||||
state.resource.set(def_sym, lc);
|
||||
state.resource.set(use_sym, ResourceLifecycle::MOVED);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a callee matches a pattern.
|
||||
/// Supports suffix matching (e.g., "fclose" matches callee "my_fclose")
|
||||
/// and dot-prefix matching (e.g., ".close" matches "file.close").
|
||||
fn callee_matches(callee: &str, pattern: &str) -> bool {
|
||||
let pattern_lower = pattern.to_ascii_lowercase();
|
||||
if pattern_lower.starts_with('.') {
|
||||
// Method pattern: ".close" matches "x.close", "file.close", etc.
|
||||
callee.ends_with(&pattern_lower)
|
||||
} else {
|
||||
// Exact or suffix match
|
||||
callee == pattern_lower || callee.ends_with(&pattern_lower)
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a callee looks like a guard/validation function.
|
||||
fn is_guard_like(callee: &str) -> bool {
|
||||
static GUARD_PREFIXES: &[&str] = &["validate", "sanitize", "check_", "verify_", "assert_"];
|
||||
GUARD_PREFIXES.iter().any(|p| callee.starts_with(p))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
#[test]
|
||||
fn callee_matches_exact() {
|
||||
assert!(callee_matches("fopen", "fopen"));
|
||||
assert!(!callee_matches("fopen", "fclose"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn callee_matches_suffix() {
|
||||
assert!(callee_matches("curlx_fclose", "fclose"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn callee_matches_dot_prefix() {
|
||||
assert!(callee_matches("file.close", ".close"));
|
||||
assert!(!callee_matches("file.close", ".open"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn acquire_sets_open() {
|
||||
let mut interner = SymbolInterner::new();
|
||||
let sym_f = interner.intern("f");
|
||||
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::C,
|
||||
resource_pairs: rules::resource_pairs(Lang::C),
|
||||
interner: &interner,
|
||||
};
|
||||
|
||||
let info = NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
span: (0, 10),
|
||||
label: None,
|
||||
defines: Some("f".into()),
|
||||
uses: vec![],
|
||||
callee: Some("fopen".into()),
|
||||
enclosing_func: None,
|
||||
call_ordinal: 0,
|
||||
condition_text: None,
|
||||
condition_vars: vec![],
|
||||
condition_negated: false,
|
||||
};
|
||||
|
||||
let (state, events) =
|
||||
transfer.apply(NodeIndex::new(0), &info, None, ProductState::initial());
|
||||
assert!(events.is_empty());
|
||||
assert_eq!(state.resource.get(sym_f), ResourceLifecycle::OPEN);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn close_after_open_sets_closed() {
|
||||
let mut interner = SymbolInterner::new();
|
||||
let sym_f = interner.intern("f");
|
||||
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::C,
|
||||
resource_pairs: rules::resource_pairs(Lang::C),
|
||||
interner: &interner,
|
||||
};
|
||||
|
||||
let mut state = ProductState::initial();
|
||||
state.resource.set(sym_f, ResourceLifecycle::OPEN);
|
||||
|
||||
let info = NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
span: (10, 20),
|
||||
label: None,
|
||||
defines: None,
|
||||
uses: vec!["f".into()],
|
||||
callee: Some("fclose".into()),
|
||||
enclosing_func: None,
|
||||
call_ordinal: 0,
|
||||
condition_text: None,
|
||||
condition_vars: vec![],
|
||||
condition_negated: false,
|
||||
};
|
||||
|
||||
let (state, events) = transfer.apply(NodeIndex::new(1), &info, None, state);
|
||||
assert!(events.is_empty());
|
||||
assert_eq!(state.resource.get(sym_f), ResourceLifecycle::CLOSED);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn double_close_emits_event() {
|
||||
let mut interner = SymbolInterner::new();
|
||||
let sym_f = interner.intern("f");
|
||||
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::C,
|
||||
resource_pairs: rules::resource_pairs(Lang::C),
|
||||
interner: &interner,
|
||||
};
|
||||
|
||||
let mut state = ProductState::initial();
|
||||
state.resource.set(sym_f, ResourceLifecycle::CLOSED);
|
||||
|
||||
let info = NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
span: (20, 30),
|
||||
label: None,
|
||||
defines: None,
|
||||
uses: vec!["f".into()],
|
||||
callee: Some("fclose".into()),
|
||||
enclosing_func: None,
|
||||
call_ordinal: 0,
|
||||
condition_text: None,
|
||||
condition_vars: vec![],
|
||||
condition_negated: false,
|
||||
};
|
||||
|
||||
let (_state, events) = transfer.apply(NodeIndex::new(2), &info, None, state);
|
||||
assert_eq!(events.len(), 1);
|
||||
assert_eq!(events[0].kind, TransferEventKind::DoubleClose);
|
||||
assert_eq!(events[0].var, sym_f);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn use_after_close_emits_event() {
|
||||
let mut interner = SymbolInterner::new();
|
||||
let sym_f = interner.intern("f");
|
||||
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::C,
|
||||
resource_pairs: rules::resource_pairs(Lang::C),
|
||||
interner: &interner,
|
||||
};
|
||||
|
||||
let mut state = ProductState::initial();
|
||||
state.resource.set(sym_f, ResourceLifecycle::CLOSED);
|
||||
|
||||
let info = NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
span: (30, 40),
|
||||
label: None,
|
||||
defines: None,
|
||||
uses: vec!["f".into()],
|
||||
callee: Some("fread".into()),
|
||||
enclosing_func: None,
|
||||
call_ordinal: 0,
|
||||
condition_text: None,
|
||||
condition_vars: vec![],
|
||||
condition_negated: false,
|
||||
};
|
||||
|
||||
let (_state, events) = transfer.apply(NodeIndex::new(3), &info, None, state);
|
||||
assert_eq!(events.len(), 1);
|
||||
assert_eq!(events[0].kind, TransferEventKind::UseAfterClose);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_guard_like_check() {
|
||||
assert!(is_guard_like("validate_input"));
|
||||
assert!(is_guard_like("sanitize_html"));
|
||||
assert!(is_guard_like("check_permission"));
|
||||
assert!(!is_guard_like("open_file"));
|
||||
}
|
||||
}
|
||||
|
|
@ -139,6 +139,22 @@ impl FuncSummary {
|
|||
}
|
||||
}
|
||||
|
||||
// ── Callee resolution ────────────────────────────────────────────────────
|
||||
|
||||
/// Result of resolving a bare callee name to a [`FuncKey`].
|
||||
///
|
||||
/// Three-valued: the call graph builder and taint engine need to distinguish
|
||||
/// "no candidates at all" from "multiple candidates, can't pick one".
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum CalleeResolution {
|
||||
/// Exactly one candidate matched.
|
||||
Resolved(FuncKey),
|
||||
/// No candidates found at all.
|
||||
NotFound,
|
||||
/// Multiple candidates — ambiguous, cannot pick one.
|
||||
Ambiguous(Vec<FuncKey>),
|
||||
}
|
||||
|
||||
// ── Lookup map used by the taint engine ─────────────────────────────────
|
||||
|
||||
/// A merged view of all function summaries keyed by qualified [`FuncKey`].
|
||||
|
|
@ -216,16 +232,66 @@ impl GlobalSummaries {
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[allow(dead_code)] // used by tests and future call-graph consumers
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.by_key.is_empty()
|
||||
}
|
||||
|
||||
/// Iterate over all (key, summary) pairs.
|
||||
#[allow(dead_code)]
|
||||
pub fn iter(&self) -> impl Iterator<Item = (&FuncKey, &FuncSummary)> {
|
||||
self.by_key.iter()
|
||||
}
|
||||
|
||||
/// Resolve a bare (already-normalized) callee name to a [`FuncKey`].
|
||||
///
|
||||
/// Resolution order:
|
||||
/// 1. Collect all same-language candidates matching the name.
|
||||
/// 2. If `arity_hint` is `Some`, filter candidates by matching arity.
|
||||
/// 3. If exactly one candidate → [`CalleeResolution::Resolved`].
|
||||
/// 4. If multiple, filter by `caller_namespace`; if exactly one → `Resolved`.
|
||||
/// 5. If still multiple → [`CalleeResolution::Ambiguous`].
|
||||
/// 6. If zero candidates → [`CalleeResolution::NotFound`].
|
||||
pub fn resolve_callee_key(
|
||||
&self,
|
||||
callee: &str,
|
||||
caller_lang: Lang,
|
||||
caller_namespace: &str,
|
||||
arity_hint: Option<usize>,
|
||||
) -> CalleeResolution {
|
||||
let candidates = self.lookup_same_lang(caller_lang, callee);
|
||||
if candidates.is_empty() {
|
||||
return CalleeResolution::NotFound;
|
||||
}
|
||||
|
||||
// Apply arity filter if hint provided.
|
||||
let filtered: Vec<&FuncKey> = if let Some(arity) = arity_hint {
|
||||
candidates
|
||||
.iter()
|
||||
.filter(|(k, _)| k.arity == Some(arity))
|
||||
.map(|(k, _)| *k)
|
||||
.collect()
|
||||
} else {
|
||||
candidates.iter().map(|(k, _)| *k).collect()
|
||||
};
|
||||
|
||||
match filtered.len() {
|
||||
0 => CalleeResolution::NotFound,
|
||||
1 => CalleeResolution::Resolved(filtered[0].clone()),
|
||||
_ => {
|
||||
// Namespace disambiguation: prefer same-namespace match.
|
||||
let same_ns: Vec<&FuncKey> = filtered
|
||||
.iter()
|
||||
.filter(|k| k.namespace == caller_namespace)
|
||||
.copied()
|
||||
.collect();
|
||||
match same_ns.len() {
|
||||
1 => CalleeResolution::Resolved(same_ns[0].clone()),
|
||||
0 => CalleeResolution::Ambiguous(filtered.into_iter().cloned().collect()),
|
||||
_ => CalleeResolution::Ambiguous(same_ns.into_iter().cloned().collect()),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for GlobalSummaries {
|
||||
|
|
|
|||
715
src/suppress/mod.rs
Normal file
715
src/suppress/mod.rs
Normal file
|
|
@ -0,0 +1,715 @@
|
|||
//! Inline per-finding suppression via source-code comments.
|
||||
//!
|
||||
//! Supports two directive forms:
|
||||
//! - `nyx:ignore <RULE_ID>[, <RULE_ID>…]` — suppress findings on the same line
|
||||
//! - `nyx:ignore-next-line <RULE_ID>[, …]` — suppress findings on the next line
|
||||
//!
|
||||
//! Comments are detected for all supported languages without tree-sitter,
|
||||
//! using a lightweight string/comment state machine.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Public types
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Whether the directive suppresses on its own line or the next line.
|
||||
#[derive(Debug, Clone, serde::Serialize)]
|
||||
pub enum SuppressionKind {
|
||||
SameLine,
|
||||
NextLine,
|
||||
}
|
||||
|
||||
/// Metadata attached to a suppressed finding.
|
||||
#[derive(Debug, Clone, serde::Serialize)]
|
||||
pub struct SuppressionMeta {
|
||||
pub kind: SuppressionKind,
|
||||
/// The pattern that matched the finding's rule ID.
|
||||
pub matched_pattern: String,
|
||||
/// 1-indexed line where the suppression directive appears.
|
||||
pub directive_line: usize,
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Internal types
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// A single rule matcher — either exact or wildcard-suffix (`foo.*`).
|
||||
#[derive(Debug)]
|
||||
enum RuleMatcher {
|
||||
Exact(String),
|
||||
/// `prefix` stores everything before the trailing `.*`.
|
||||
WildcardSuffix(String),
|
||||
}
|
||||
|
||||
impl RuleMatcher {
|
||||
fn matches(&self, rule_id: &str) -> bool {
|
||||
match self {
|
||||
RuleMatcher::Exact(s) => s == rule_id,
|
||||
RuleMatcher::WildcardSuffix(prefix) => {
|
||||
rule_id.starts_with(prefix.as_str())
|
||||
&& rule_id.len() > prefix.len()
|
||||
&& rule_id.as_bytes()[prefix.len()] == b'.'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A parsed directive from a single comment.
|
||||
#[derive(Debug)]
|
||||
struct LineDirective {
|
||||
kind: SuppressionKind,
|
||||
/// 1-indexed line where the directive comment appears.
|
||||
directive_line: usize,
|
||||
matchers: Vec<RuleMatcher>,
|
||||
}
|
||||
|
||||
/// Pre-built index of suppression directives keyed by **target line** (the
|
||||
/// line whose findings should be suppressed, 1-indexed).
|
||||
pub struct SuppressionIndex {
|
||||
directives: HashMap<usize, Vec<LineDirective>>,
|
||||
}
|
||||
|
||||
impl SuppressionIndex {
|
||||
/// Check whether a finding at `line` (1-indexed) with `rule_id` is suppressed.
|
||||
pub fn check(&self, line: usize, rule_id: &str) -> Option<SuppressionMeta> {
|
||||
let canon = canonical_rule_id(rule_id);
|
||||
let dirs = self.directives.get(&line)?;
|
||||
for dir in dirs {
|
||||
for m in &dir.matchers {
|
||||
if m.matches(canon) {
|
||||
let display_pattern = match m {
|
||||
RuleMatcher::Exact(s) => s.clone(),
|
||||
RuleMatcher::WildcardSuffix(s) => format!("{s}.*"),
|
||||
};
|
||||
return Some(SuppressionMeta {
|
||||
kind: dir.kind.clone(),
|
||||
matched_pattern: display_pattern,
|
||||
directive_line: dir.directive_line,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Returns `true` if no directives were found.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.directives.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Canonical rule ID
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Strip parenthetical suffix from a rule ID:
|
||||
/// `"taint-unsanitised-flow (source 5:1)"` → `"taint-unsanitised-flow"`.
|
||||
pub fn canonical_rule_id(id: &str) -> &str {
|
||||
let trimmed = id.trim();
|
||||
if let Some(idx) = trimmed.find(" (") {
|
||||
trimmed[..idx].trim_end()
|
||||
} else {
|
||||
trimmed
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Comment style per language
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
enum CommentStyle {
|
||||
/// `//` and `/* */` — Rust, C, C++, Java, Go, JS, TS
|
||||
CStyle,
|
||||
/// `#` only — Python, Ruby
|
||||
Hash,
|
||||
/// `//`, `#`, and `/* */` — PHP
|
||||
PhpStyle,
|
||||
}
|
||||
|
||||
/// Map a file extension to the comment style for that language.
|
||||
fn comment_style_for_ext(ext: &str) -> Option<CommentStyle> {
|
||||
match ext {
|
||||
"rs" | "c" | "cpp" | "java" | "go" | "ts" | "js" => Some(CommentStyle::CStyle),
|
||||
"py" | "rb" => Some(CommentStyle::Hash),
|
||||
"php" => Some(CommentStyle::PhpStyle),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Map a file path to its comment style by inspecting the extension.
|
||||
fn comment_style_for_path(path: &std::path::Path) -> Option<CommentStyle> {
|
||||
let ext = path.extension().and_then(|s| s.to_str())?;
|
||||
// Normalise common variant extensions
|
||||
let norm = match ext {
|
||||
"RS" => "rs",
|
||||
"c++" => "cpp",
|
||||
"PY" => "py",
|
||||
"TSX" | "tsx" => "ts",
|
||||
other => other,
|
||||
};
|
||||
comment_style_for_ext(norm)
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Parser
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Parse inline suppression directives from `source`, using comment syntax
|
||||
/// appropriate for the given file path.
|
||||
///
|
||||
/// Returns an empty index if the source doesn't contain `nyx:ignore` or the
|
||||
/// language is unsupported.
|
||||
pub fn parse_inline_suppressions(path: &std::path::Path, source: &str) -> SuppressionIndex {
|
||||
// Fast path: no directives possible.
|
||||
if !source.as_bytes().windows(10).any(|w| w == b"nyx:ignore") {
|
||||
return SuppressionIndex {
|
||||
directives: HashMap::new(),
|
||||
};
|
||||
}
|
||||
|
||||
let Some(style) = comment_style_for_path(path) else {
|
||||
return SuppressionIndex {
|
||||
directives: HashMap::new(),
|
||||
};
|
||||
};
|
||||
|
||||
let mut index: HashMap<usize, Vec<LineDirective>> = HashMap::new();
|
||||
let total_lines = source.lines().count();
|
||||
|
||||
// State machine for string/comment tracking.
|
||||
let mut in_block_comment = false;
|
||||
let mut block_comment_start_line: usize = 0;
|
||||
|
||||
for (line_idx, raw_line) in source.lines().enumerate() {
|
||||
let line_num = line_idx + 1; // 1-indexed
|
||||
let line = raw_line.trim_end_matches('\r');
|
||||
|
||||
if in_block_comment {
|
||||
// Check for block comment end.
|
||||
if let Some(end_pos) = line.find("*/") {
|
||||
// Extract text before `*/` — may contain a directive.
|
||||
let block_text = &line[..end_pos];
|
||||
if let Some(dir) = try_parse_directive(block_text, line_num) {
|
||||
let target = target_line(&dir, line_num, total_lines);
|
||||
if let Some(t) = target {
|
||||
index.entry(t).or_default().push(dir);
|
||||
}
|
||||
}
|
||||
in_block_comment = false;
|
||||
// After the block comment ends, check the rest of the line
|
||||
// for a line comment.
|
||||
let rest = &line[end_pos + 2..];
|
||||
if let Some(dir) = extract_from_line_rest(rest, line_num, style) {
|
||||
let target = target_line(&dir, line_num, total_lines);
|
||||
if let Some(t) = target {
|
||||
index.entry(t).or_default().push(dir);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Still inside block comment — check for directive.
|
||||
if let Some(dir) = try_parse_directive(line, line_num) {
|
||||
let target = target_line(&dir, line_num, total_lines);
|
||||
if let Some(t) = target {
|
||||
index.entry(t).or_default().push(dir);
|
||||
}
|
||||
}
|
||||
}
|
||||
let _ = block_comment_start_line; // suppress unused warning
|
||||
continue;
|
||||
}
|
||||
|
||||
// Not in a block comment — scan the line character by character
|
||||
// tracking string state.
|
||||
if let Some(dir) = scan_line_for_directive(line, line_num, style, &mut in_block_comment) {
|
||||
let target = target_line(&dir, line_num, total_lines);
|
||||
if let Some(t) = target {
|
||||
index.entry(t).or_default().push(dir);
|
||||
}
|
||||
}
|
||||
if in_block_comment {
|
||||
block_comment_start_line = line_num;
|
||||
}
|
||||
}
|
||||
|
||||
SuppressionIndex { directives: index }
|
||||
}
|
||||
|
||||
/// Compute the target line for a directive. Returns `None` if the directive
|
||||
/// is `NextLine` but on the last line (EOF — no-op).
|
||||
fn target_line(dir: &LineDirective, line_num: usize, total_lines: usize) -> Option<usize> {
|
||||
match dir.kind {
|
||||
SuppressionKind::SameLine => Some(line_num),
|
||||
SuppressionKind::NextLine => {
|
||||
if line_num < total_lines {
|
||||
Some(line_num + 1)
|
||||
} else {
|
||||
None // EOF — no next line
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Scan a single line (not inside a block comment) for a suppression directive.
|
||||
/// Tracks string literals to avoid false positives.
|
||||
///
|
||||
/// Sets `in_block_comment` to `true` if the line opens a `/* */` block that
|
||||
/// doesn't close on the same line.
|
||||
fn scan_line_for_directive(
|
||||
line: &str,
|
||||
line_num: usize,
|
||||
style: CommentStyle,
|
||||
in_block_comment: &mut bool,
|
||||
) -> Option<LineDirective> {
|
||||
let bytes = line.as_bytes();
|
||||
let len = bytes.len();
|
||||
let mut i = 0;
|
||||
|
||||
// String state
|
||||
let mut in_string: Option<u8> = None; // quote char: b'"', b'\'', b'`'
|
||||
|
||||
while i < len {
|
||||
let ch = bytes[i];
|
||||
|
||||
// ── Inside a string literal ─────────────────────────────────────
|
||||
if let Some(quote) = in_string {
|
||||
if ch == b'\\' {
|
||||
i += 2; // skip escaped char
|
||||
continue;
|
||||
}
|
||||
// Python triple quotes
|
||||
if (quote == b'"' || quote == b'\'')
|
||||
&& i + 2 < len
|
||||
&& bytes[i] == quote
|
||||
&& bytes[i + 1] == quote
|
||||
&& bytes[i + 2] == quote
|
||||
{
|
||||
// Check if this is a triple-quote close
|
||||
// (we entered via triple-quote open, but we track single quote char)
|
||||
in_string = None;
|
||||
i += 3;
|
||||
continue;
|
||||
}
|
||||
if ch == quote {
|
||||
in_string = None;
|
||||
}
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// ── Not in a string ─────────────────────────────────────────────
|
||||
|
||||
// Rust raw strings: r"..." or r#"..."#
|
||||
if ch == b'r' && i + 1 < len {
|
||||
let next = bytes[i + 1];
|
||||
if next == b'"' {
|
||||
// r"..." — skip to closing "
|
||||
i += 2;
|
||||
while i < len && bytes[i] != b'"' {
|
||||
i += 1;
|
||||
}
|
||||
i += 1; // skip closing "
|
||||
continue;
|
||||
}
|
||||
if next == b'#' {
|
||||
// Count hashes
|
||||
let hash_start = i + 1;
|
||||
let mut j = i + 1;
|
||||
while j < len && bytes[j] == b'#' {
|
||||
j += 1;
|
||||
}
|
||||
let hash_count = j - hash_start;
|
||||
if j < len && bytes[j] == b'"' {
|
||||
// Skip to closing "###
|
||||
let close_pat_len = 1 + hash_count; // " + hashes
|
||||
i = j + 1;
|
||||
'raw: while i < len {
|
||||
if bytes[i] == b'"' {
|
||||
// Check for matching hashes
|
||||
let mut k = 1;
|
||||
while k <= hash_count && i + k < len && bytes[i + k] == b'#' {
|
||||
k += 1;
|
||||
}
|
||||
if k > hash_count {
|
||||
i += close_pat_len;
|
||||
break 'raw;
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Python triple quotes: """ or '''
|
||||
if (ch == b'"' || ch == b'\'') && i + 2 < len && bytes[i + 1] == ch && bytes[i + 2] == ch {
|
||||
in_string = Some(ch);
|
||||
i += 3;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Regular string literals
|
||||
if ch == b'"' || ch == b'\'' || ch == b'`' {
|
||||
in_string = Some(ch);
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// ── Comment detection ───────────────────────────────────────────
|
||||
|
||||
// C-style line comment: //
|
||||
let has_slash_slash = matches!(style, CommentStyle::CStyle | CommentStyle::PhpStyle);
|
||||
if has_slash_slash && ch == b'/' && i + 1 < len && bytes[i + 1] == b'/' {
|
||||
let comment_body = &line[i + 2..];
|
||||
return try_parse_directive(comment_body, line_num);
|
||||
}
|
||||
|
||||
// Block comment: /*
|
||||
let has_block = matches!(style, CommentStyle::CStyle | CommentStyle::PhpStyle);
|
||||
if has_block && ch == b'/' && i + 1 < len && bytes[i + 1] == b'*' {
|
||||
// Look for closing */ on the same line
|
||||
let rest = &line[i + 2..];
|
||||
if let Some(end) = rest.find("*/") {
|
||||
let block_body = &rest[..end];
|
||||
// Check directive in block body
|
||||
if let Some(dir) = try_parse_directive(block_body, line_num) {
|
||||
return Some(dir);
|
||||
}
|
||||
// Continue scanning after the block
|
||||
i = i + 2 + end + 2;
|
||||
continue;
|
||||
} else {
|
||||
// Block comment extends to next line(s)
|
||||
*in_block_comment = true;
|
||||
let block_body = rest;
|
||||
return try_parse_directive(block_body, line_num);
|
||||
}
|
||||
}
|
||||
|
||||
// Hash comment: #
|
||||
let has_hash = matches!(style, CommentStyle::Hash | CommentStyle::PhpStyle);
|
||||
if has_hash && ch == b'#' {
|
||||
let comment_body = &line[i + 1..];
|
||||
return try_parse_directive(comment_body, line_num);
|
||||
}
|
||||
|
||||
i += 1;
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Try to extract a directive from a line rest (after a block comment closes).
|
||||
fn extract_from_line_rest(
|
||||
rest: &str,
|
||||
line_num: usize,
|
||||
style: CommentStyle,
|
||||
) -> Option<LineDirective> {
|
||||
let mut in_block = false;
|
||||
scan_line_for_directive(rest, line_num, style, &mut in_block)
|
||||
}
|
||||
|
||||
/// Try to parse a `nyx:ignore` or `nyx:ignore-next-line` directive from
|
||||
/// comment body text. Returns `None` if no directive is found.
|
||||
fn try_parse_directive(text: &str, line_num: usize) -> Option<LineDirective> {
|
||||
let trimmed = text.trim();
|
||||
// Strip leading `*` or `* ` common in block comments (e.g. ` * nyx:ignore ...`).
|
||||
let trimmed = trimmed
|
||||
.strip_prefix("* ")
|
||||
.or(trimmed.strip_prefix('*'))
|
||||
.unwrap_or(trimmed)
|
||||
.trim();
|
||||
|
||||
// Check for `nyx:ignore-next-line` first (longer prefix wins).
|
||||
if let Some(rest) = strip_directive_prefix(trimmed, "nyx:ignore-next-line") {
|
||||
let matchers = parse_rule_ids(rest);
|
||||
if matchers.is_empty() {
|
||||
return None;
|
||||
}
|
||||
return Some(LineDirective {
|
||||
kind: SuppressionKind::NextLine,
|
||||
directive_line: line_num,
|
||||
matchers,
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(rest) = strip_directive_prefix(trimmed, "nyx:ignore") {
|
||||
let matchers = parse_rule_ids(rest);
|
||||
if matchers.is_empty() {
|
||||
return None;
|
||||
}
|
||||
return Some(LineDirective {
|
||||
kind: SuppressionKind::SameLine,
|
||||
directive_line: line_num,
|
||||
matchers,
|
||||
});
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Strip a directive prefix, allowing optional whitespace or the rest of the
|
||||
/// line to follow.
|
||||
fn strip_directive_prefix<'a>(text: &'a str, prefix: &str) -> Option<&'a str> {
|
||||
let rest = text.strip_prefix(prefix)?;
|
||||
// Must be followed by whitespace, end of string, or nothing.
|
||||
// If prefix is "nyx:ignore" and rest starts with "-next-line", don't match
|
||||
// (handled by checking the longer prefix first).
|
||||
if rest.is_empty() || rest.starts_with(char::is_whitespace) {
|
||||
Some(rest)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse comma-separated rule IDs into matchers.
|
||||
fn parse_rule_ids(text: &str) -> Vec<RuleMatcher> {
|
||||
text.split(',')
|
||||
.map(|s| s.trim())
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|s| {
|
||||
if let Some(prefix) = s.strip_suffix(".*") {
|
||||
RuleMatcher::WildcardSuffix(prefix.to_string())
|
||||
} else {
|
||||
RuleMatcher::Exact(s.to_string())
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Tests
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::Path;
|
||||
|
||||
fn rust_path() -> &'static Path {
|
||||
Path::new("test.rs")
|
||||
}
|
||||
fn py_path() -> &'static Path {
|
||||
Path::new("test.py")
|
||||
}
|
||||
fn rb_path() -> &'static Path {
|
||||
Path::new("test.rb")
|
||||
}
|
||||
fn php_path() -> &'static Path {
|
||||
Path::new("test.php")
|
||||
}
|
||||
fn js_path() -> &'static Path {
|
||||
Path::new("test.js")
|
||||
}
|
||||
|
||||
// 1. `//` comment parsing
|
||||
#[test]
|
||||
fn slash_slash_comment_suppresses() {
|
||||
let src = "let x = 1; // nyx:ignore rule.a\n";
|
||||
let idx = parse_inline_suppressions(rust_path(), src);
|
||||
assert!(idx.check(1, "rule.a").is_some());
|
||||
assert!(idx.check(1, "rule.b").is_none());
|
||||
}
|
||||
|
||||
// 2. `#` comment parsing
|
||||
#[test]
|
||||
fn hash_comment_suppresses() {
|
||||
let src = "x = 1 # nyx:ignore rule.a\n";
|
||||
let idx = parse_inline_suppressions(py_path(), src);
|
||||
assert!(idx.check(1, "rule.a").is_some());
|
||||
}
|
||||
|
||||
// 3. `/* */` block comment
|
||||
#[test]
|
||||
fn block_comment_suppresses() {
|
||||
let src = "let x = 1; /* nyx:ignore rule.a */\n";
|
||||
let idx = parse_inline_suppressions(rust_path(), src);
|
||||
assert!(idx.check(1, "rule.a").is_some());
|
||||
}
|
||||
|
||||
// 4. Same-line semantics
|
||||
#[test]
|
||||
fn same_line_only_suppresses_own_line() {
|
||||
let src = "line1\nlet x = 1; // nyx:ignore rule.a\nline3\n";
|
||||
let idx = parse_inline_suppressions(rust_path(), src);
|
||||
assert!(idx.check(1, "rule.a").is_none());
|
||||
assert!(idx.check(2, "rule.a").is_some());
|
||||
assert!(idx.check(3, "rule.a").is_none());
|
||||
}
|
||||
|
||||
// 5. Next-line semantics
|
||||
#[test]
|
||||
fn next_line_suppresses_following_line() {
|
||||
let src = "// nyx:ignore-next-line rule.a\nlet x = dangerous();\nline3\n";
|
||||
let idx = parse_inline_suppressions(rust_path(), src);
|
||||
assert!(idx.check(1, "rule.a").is_none());
|
||||
assert!(idx.check(2, "rule.a").is_some());
|
||||
assert!(idx.check(3, "rule.a").is_none());
|
||||
}
|
||||
|
||||
// 6. Multiple rule IDs
|
||||
#[test]
|
||||
fn multiple_rule_ids() {
|
||||
let src = "let x = 1; // nyx:ignore a.b.c, x.y.z\n";
|
||||
let idx = parse_inline_suppressions(rust_path(), src);
|
||||
assert!(idx.check(1, "a.b.c").is_some());
|
||||
assert!(idx.check(1, "x.y.z").is_some());
|
||||
assert!(idx.check(1, "other").is_none());
|
||||
}
|
||||
|
||||
// 7. Wildcard suffix
|
||||
#[test]
|
||||
fn wildcard_suffix_matching() {
|
||||
let src = "let x = 1; // nyx:ignore rs.quality.*\n";
|
||||
let idx = parse_inline_suppressions(rust_path(), src);
|
||||
assert!(idx.check(1, "rs.quality.foo").is_some());
|
||||
assert!(idx.check(1, "rs.quality.bar").is_some());
|
||||
assert!(idx.check(1, "rs.other.foo").is_none());
|
||||
// Exact match of prefix without the dot should not match
|
||||
assert!(idx.check(1, "rs.quality").is_none());
|
||||
}
|
||||
|
||||
// 8. String literal guard
|
||||
#[test]
|
||||
fn string_literal_not_suppressed() {
|
||||
let src = "let x = \"// nyx:ignore rule.a\";\n";
|
||||
let idx = parse_inline_suppressions(rust_path(), src);
|
||||
assert!(idx.check(1, "rule.a").is_none());
|
||||
}
|
||||
|
||||
// 9. Rust raw string guard
|
||||
#[test]
|
||||
fn rust_raw_string_not_suppressed() {
|
||||
let src = "let x = r#\"// nyx:ignore rule.a\"#;\n";
|
||||
let idx = parse_inline_suppressions(rust_path(), src);
|
||||
assert!(idx.check(1, "rule.a").is_none());
|
||||
}
|
||||
|
||||
// 10. Rule ID mismatch
|
||||
#[test]
|
||||
fn rule_id_mismatch() {
|
||||
let src = "let x = 1; // nyx:ignore rule-a\n";
|
||||
let idx = parse_inline_suppressions(rust_path(), src);
|
||||
assert!(idx.check(1, "rule-a").is_some());
|
||||
assert!(idx.check(1, "rule-b").is_none());
|
||||
}
|
||||
|
||||
// 11. Taint rule ID canonicalization
|
||||
#[test]
|
||||
fn taint_rule_id_canonicalization() {
|
||||
let src = "let x = 1; // nyx:ignore taint-unsanitised-flow\n";
|
||||
let idx = parse_inline_suppressions(rust_path(), src);
|
||||
assert!(
|
||||
idx.check(1, "taint-unsanitised-flow (source 5:1)")
|
||||
.is_some()
|
||||
);
|
||||
assert!(idx.check(1, "taint-unsanitised-flow").is_some());
|
||||
}
|
||||
|
||||
// 12. Multiple directives targeting the same line
|
||||
#[test]
|
||||
fn multiple_directives_same_target() {
|
||||
let src = "// nyx:ignore-next-line rule-a\n// nyx:ignore-next-line rule-b\nlet x = dangerous();\n";
|
||||
let idx = parse_inline_suppressions(rust_path(), src);
|
||||
// First ignore-next-line targets line 2, second targets line 3
|
||||
assert!(idx.check(2, "rule-a").is_some());
|
||||
assert!(idx.check(3, "rule-b").is_some());
|
||||
}
|
||||
|
||||
// 13. Block comment with ignore-next-line
|
||||
#[test]
|
||||
fn block_comment_next_line() {
|
||||
let src = "/* nyx:ignore-next-line rule.a */\nlet x = dangerous();\n";
|
||||
let idx = parse_inline_suppressions(rust_path(), src);
|
||||
assert!(idx.check(2, "rule.a").is_some());
|
||||
}
|
||||
|
||||
// 14. EOF ignore-next-line is a no-op
|
||||
#[test]
|
||||
fn eof_next_line_no_panic() {
|
||||
let src = "// nyx:ignore-next-line rule.a";
|
||||
let idx = parse_inline_suppressions(rust_path(), src);
|
||||
// Line 1 is the last line, so ignore-next-line targets line 2 which doesn't exist
|
||||
assert!(idx.check(1, "rule.a").is_none());
|
||||
assert!(idx.check(2, "rule.a").is_none());
|
||||
}
|
||||
|
||||
// 15. CRLF input
|
||||
#[test]
|
||||
fn crlf_line_endings() {
|
||||
let src = "let x = 1; // nyx:ignore rule.a\r\nlet y = 2;\r\n";
|
||||
let idx = parse_inline_suppressions(rust_path(), src);
|
||||
assert!(idx.check(1, "rule.a").is_some());
|
||||
assert!(idx.check(2, "rule.a").is_none());
|
||||
}
|
||||
|
||||
// 16. Whitespace tolerance
|
||||
#[test]
|
||||
fn whitespace_tolerance() {
|
||||
let src = "let x = 1; // nyx:ignore rule.a, rule.b \n";
|
||||
let idx = parse_inline_suppressions(rust_path(), src);
|
||||
assert!(idx.check(1, "rule.a").is_some());
|
||||
assert!(idx.check(1, "rule.b").is_some());
|
||||
}
|
||||
|
||||
// 17. PHP multi-style comments
|
||||
#[test]
|
||||
fn php_multi_style() {
|
||||
let src_hash = "<?php\n$x = 1; # nyx:ignore rule.a\n";
|
||||
let src_slash = "<?php\n$x = 1; // nyx:ignore rule.b\n";
|
||||
let idx_hash = parse_inline_suppressions(php_path(), src_hash);
|
||||
let idx_slash = parse_inline_suppressions(php_path(), src_slash);
|
||||
assert!(idx_hash.check(2, "rule.a").is_some());
|
||||
assert!(idx_slash.check(2, "rule.b").is_some());
|
||||
}
|
||||
|
||||
// ── canonical_rule_id tests ─────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn canonical_strips_parenthetical() {
|
||||
assert_eq!(
|
||||
canonical_rule_id("taint-unsanitised-flow (source 5:1)"),
|
||||
"taint-unsanitised-flow"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn canonical_no_parenthetical_unchanged() {
|
||||
assert_eq!(canonical_rule_id("rs.quality.unwrap"), "rs.quality.unwrap");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn canonical_trims_whitespace() {
|
||||
assert_eq!(canonical_rule_id(" rule.a "), "rule.a");
|
||||
}
|
||||
|
||||
// ── Ruby hash comment ───────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn ruby_hash_comment() {
|
||||
let src = "x = dangerous # nyx:ignore rule.a\n";
|
||||
let idx = parse_inline_suppressions(rb_path(), src);
|
||||
assert!(idx.check(1, "rule.a").is_some());
|
||||
}
|
||||
|
||||
// ── JS template literal guard ───────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn js_template_literal_not_suppressed() {
|
||||
let src = "let x = `// nyx:ignore rule.a`;\n";
|
||||
let idx = parse_inline_suppressions(js_path(), src);
|
||||
assert!(idx.check(1, "rule.a").is_none());
|
||||
}
|
||||
|
||||
// ── Multiline block comment ─────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn multiline_block_comment() {
|
||||
let src = "/*\n * nyx:ignore rule.a\n */\nlet x = dangerous;\n";
|
||||
let idx = parse_inline_suppressions(rust_path(), src);
|
||||
// The directive is on line 2, same-line → targets line 2
|
||||
assert!(idx.check(2, "rule.a").is_some());
|
||||
}
|
||||
}
|
||||
620
src/taint/domain.rs
Normal file
620
src/taint/domain.rs
Normal file
|
|
@ -0,0 +1,620 @@
|
|||
use crate::labels::{Cap, SourceKind};
|
||||
use crate::state::lattice::Lattice;
|
||||
use crate::state::symbol::SymbolId;
|
||||
use crate::taint::path_state::PredicateKind;
|
||||
use petgraph::graph::NodeIndex;
|
||||
use smallvec::SmallVec;
|
||||
|
||||
/// Maximum origins tracked per variable (bounded to prevent growth).
|
||||
const MAX_ORIGINS_PER_VAR: usize = 4;
|
||||
|
||||
/// Per-variable taint information.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct VarTaint {
|
||||
pub caps: Cap,
|
||||
/// Up to N origins that contributed taint (bounded).
|
||||
pub origins: SmallVec<[TaintOrigin; 2]>,
|
||||
}
|
||||
|
||||
/// A single taint origin — the node and classification of where taint came from.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub struct TaintOrigin {
|
||||
pub node: NodeIndex,
|
||||
pub source_kind: SourceKind,
|
||||
}
|
||||
|
||||
/// Compact bitset for up to 64 variables (indexed by SymbolId ordinal).
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct SmallBitSet(u64);
|
||||
|
||||
impl SmallBitSet {
|
||||
pub fn empty() -> Self {
|
||||
Self(0)
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, id: SymbolId) {
|
||||
let idx = id.0;
|
||||
if idx < 64 {
|
||||
self.0 |= 1u64 << idx;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn contains(&self, id: SymbolId) -> bool {
|
||||
let idx = id.0;
|
||||
if idx < 64 {
|
||||
self.0 & (1u64 << idx) != 0
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Union: self | other
|
||||
pub fn union(self, other: Self) -> Self {
|
||||
Self(self.0 | other.0)
|
||||
}
|
||||
|
||||
/// Intersection: self & other
|
||||
pub fn intersection(self, other: Self) -> Self {
|
||||
Self(self.0 & other.0)
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn is_empty(self) -> bool {
|
||||
self.0 == 0
|
||||
}
|
||||
|
||||
/// Whether self is a subset of other.
|
||||
#[allow(dead_code)] // used by Lattice::leq
|
||||
pub fn is_subset_of(self, other: Self) -> bool {
|
||||
self.0 & other.0 == self.0
|
||||
}
|
||||
|
||||
/// Whether self is a superset of other.
|
||||
#[allow(dead_code)] // used by Lattice::leq
|
||||
pub fn is_superset_of(self, other: Self) -> bool {
|
||||
other.is_subset_of(self)
|
||||
}
|
||||
}
|
||||
|
||||
/// Monotone predicate summary per variable.
|
||||
///
|
||||
/// Tracks which whitelisted predicate kinds are known true/false on ALL paths.
|
||||
/// join = intersection of bits (must-hold semantics).
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct PredicateSummary {
|
||||
/// Bitmask: bit 0=NullCheck, 1=EmptyCheck, 2=ErrorCheck
|
||||
pub known_true: u8,
|
||||
pub known_false: u8,
|
||||
}
|
||||
|
||||
impl PredicateSummary {
|
||||
pub fn empty() -> Self {
|
||||
Self {
|
||||
known_true: 0,
|
||||
known_false: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Join = intersection (only predicates true on ALL paths).
|
||||
pub fn join(self, other: Self) -> Self {
|
||||
Self {
|
||||
known_true: self.known_true & other.known_true,
|
||||
known_false: self.known_false & other.known_false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check for contradiction: same kind known both true and false.
|
||||
pub fn has_contradiction(self) -> bool {
|
||||
self.known_true & self.known_false != 0
|
||||
}
|
||||
|
||||
pub fn is_empty(self) -> bool {
|
||||
self.known_true == 0 && self.known_false == 0
|
||||
}
|
||||
}
|
||||
|
||||
/// Map a whitelisted PredicateKind to its bit index (0-2).
|
||||
/// Returns None for non-whitelisted kinds.
|
||||
pub fn predicate_kind_bit(kind: PredicateKind) -> Option<u8> {
|
||||
match kind {
|
||||
PredicateKind::NullCheck => Some(0),
|
||||
PredicateKind::EmptyCheck => Some(1),
|
||||
PredicateKind::ErrorCheck => Some(2),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// The abstract taint state at a program point.
|
||||
///
|
||||
/// Uses sorted SmallVec keyed by SymbolId for O(n) merge-join.
|
||||
/// Variables beyond the interner's capacity are naturally excluded.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct TaintState {
|
||||
/// Per-variable taint, sorted by SymbolId.
|
||||
pub vars: SmallVec<[(SymbolId, VarTaint); 16]>,
|
||||
|
||||
/// Variables validated on ALL paths (intersection on join).
|
||||
pub validated_must: SmallBitSet,
|
||||
|
||||
/// Variables validated on ANY path (union on join).
|
||||
pub validated_may: SmallBitSet,
|
||||
|
||||
/// Per-variable predicate summary (sorted by SymbolId).
|
||||
pub predicates: SmallVec<[(SymbolId, PredicateSummary); 4]>,
|
||||
}
|
||||
|
||||
impl TaintState {
|
||||
/// Create the initial state (no taint, no validation, no predicates).
|
||||
pub fn initial() -> Self {
|
||||
Self {
|
||||
vars: SmallVec::new(),
|
||||
validated_must: SmallBitSet::empty(),
|
||||
validated_may: SmallBitSet::empty(),
|
||||
predicates: SmallVec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Look up taint for a variable.
|
||||
pub fn get(&self, sym: SymbolId) -> Option<&VarTaint> {
|
||||
self.vars
|
||||
.binary_search_by_key(&sym, |(id, _)| *id)
|
||||
.ok()
|
||||
.map(|idx| &self.vars[idx].1)
|
||||
}
|
||||
|
||||
/// Insert or update taint for a variable.
|
||||
pub fn set(&mut self, sym: SymbolId, taint: VarTaint) {
|
||||
match self.vars.binary_search_by_key(&sym, |(id, _)| *id) {
|
||||
Ok(idx) => self.vars[idx].1 = taint,
|
||||
Err(idx) => self.vars.insert(idx, (sym, taint)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove taint for a variable.
|
||||
pub fn remove(&mut self, sym: SymbolId) {
|
||||
if let Ok(idx) = self.vars.binary_search_by_key(&sym, |(id, _)| *id) {
|
||||
self.vars.remove(idx);
|
||||
}
|
||||
}
|
||||
|
||||
/// Set a predicate summary for a variable.
|
||||
pub fn set_predicate(&mut self, sym: SymbolId, summary: PredicateSummary) {
|
||||
match self.predicates.binary_search_by_key(&sym, |(id, _)| *id) {
|
||||
Ok(idx) => self.predicates[idx].1 = summary,
|
||||
Err(idx) => self.predicates.insert(idx, (sym, summary)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get predicate summary for a variable.
|
||||
pub fn get_predicate(&self, sym: SymbolId) -> PredicateSummary {
|
||||
self.predicates
|
||||
.binary_search_by_key(&sym, |(id, _)| *id)
|
||||
.ok()
|
||||
.map(|idx| self.predicates[idx].1)
|
||||
.unwrap_or_else(PredicateSummary::empty)
|
||||
}
|
||||
|
||||
/// Check if any variable has contradictory predicates.
|
||||
pub fn has_contradiction(&self) -> bool {
|
||||
self.predicates.iter().any(|(_, s)| s.has_contradiction())
|
||||
}
|
||||
}
|
||||
|
||||
impl Lattice for TaintState {
|
||||
fn bot() -> Self {
|
||||
Self::initial()
|
||||
}
|
||||
|
||||
fn join(&self, other: &Self) -> Self {
|
||||
// Merge-join vars (sorted by SymbolId)
|
||||
let vars = merge_join_vars(&self.vars, &other.vars);
|
||||
|
||||
// validated_must = intersection (must hold on ALL paths)
|
||||
let validated_must = self.validated_must.intersection(other.validated_must);
|
||||
|
||||
// validated_may = union (holds on ANY path)
|
||||
let validated_may = self.validated_may.union(other.validated_may);
|
||||
|
||||
// predicates = per-key intersection of known_true/known_false bits
|
||||
let predicates = merge_join_predicates(&self.predicates, &other.predicates);
|
||||
|
||||
TaintState {
|
||||
vars,
|
||||
validated_must,
|
||||
validated_may,
|
||||
predicates,
|
||||
}
|
||||
}
|
||||
|
||||
fn leq(&self, other: &Self) -> bool {
|
||||
// Per-key Cap subset + origins subset
|
||||
if !vars_leq(&self.vars, &other.vars) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// validated_must: self ⊇ other (superset = less info = lower)
|
||||
if !self.validated_must.is_superset_of(other.validated_must) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// validated_may: self ⊆ other
|
||||
if !self.validated_may.is_subset_of(other.validated_may) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// predicates: self.known_true ⊇ other.known_true (more precise = lower)
|
||||
predicates_leq(&self.predicates, &other.predicates)
|
||||
}
|
||||
}
|
||||
|
||||
/// Merge-join two sorted var lists: per-key Cap OR + origins merge (bounded).
|
||||
fn merge_join_vars(
|
||||
a: &[(SymbolId, VarTaint)],
|
||||
b: &[(SymbolId, VarTaint)],
|
||||
) -> SmallVec<[(SymbolId, VarTaint); 16]> {
|
||||
let mut result = SmallVec::with_capacity(a.len().max(b.len()));
|
||||
let (mut i, mut j) = (0, 0);
|
||||
|
||||
while i < a.len() && j < b.len() {
|
||||
match a[i].0.cmp(&b[j].0) {
|
||||
std::cmp::Ordering::Less => {
|
||||
result.push(a[i].clone());
|
||||
i += 1;
|
||||
}
|
||||
std::cmp::Ordering::Greater => {
|
||||
result.push(b[j].clone());
|
||||
j += 1;
|
||||
}
|
||||
std::cmp::Ordering::Equal => {
|
||||
let caps = a[i].1.caps | b[j].1.caps;
|
||||
let origins = merge_origins(&a[i].1.origins, &b[j].1.origins);
|
||||
result.push((a[i].0, VarTaint { caps, origins }));
|
||||
i += 1;
|
||||
j += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remaining from either side
|
||||
while i < a.len() {
|
||||
result.push(a[i].clone());
|
||||
i += 1;
|
||||
}
|
||||
while j < b.len() {
|
||||
result.push(b[j].clone());
|
||||
j += 1;
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Merge two origin lists, deduplicating by node and bounding at MAX_ORIGINS_PER_VAR.
|
||||
fn merge_origins(
|
||||
a: &SmallVec<[TaintOrigin; 2]>,
|
||||
b: &SmallVec<[TaintOrigin; 2]>,
|
||||
) -> SmallVec<[TaintOrigin; 2]> {
|
||||
let mut merged = a.clone();
|
||||
for origin in b {
|
||||
if merged.len() >= MAX_ORIGINS_PER_VAR {
|
||||
break;
|
||||
}
|
||||
if !merged.iter().any(|o| o.node == origin.node) {
|
||||
merged.push(*origin);
|
||||
}
|
||||
}
|
||||
merged
|
||||
}
|
||||
|
||||
/// Check if a.vars ⊑ b.vars (per-key Cap subset + origins subset).
|
||||
#[allow(dead_code)] // called by Lattice::leq
|
||||
fn vars_leq(a: &[(SymbolId, VarTaint)], b: &[(SymbolId, VarTaint)]) -> bool {
|
||||
let (mut i, mut j) = (0, 0);
|
||||
|
||||
while i < a.len() {
|
||||
if j >= b.len() {
|
||||
return false; // a has keys not in b → not ⊑
|
||||
}
|
||||
match a[i].0.cmp(&b[j].0) {
|
||||
std::cmp::Ordering::Less => return false, // key in a but not b
|
||||
std::cmp::Ordering::Greater => {
|
||||
j += 1; // key only in b, skip
|
||||
}
|
||||
std::cmp::Ordering::Equal => {
|
||||
// Cap subset check
|
||||
if a[i].1.caps & b[j].1.caps != a[i].1.caps {
|
||||
return false;
|
||||
}
|
||||
// Origins subset check (by node)
|
||||
for orig in &a[i].1.origins {
|
||||
if !b[j].1.origins.iter().any(|o| o.node == orig.node) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
j += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Merge-join predicate summaries with intersection semantics.
|
||||
fn merge_join_predicates(
|
||||
a: &[(SymbolId, PredicateSummary)],
|
||||
b: &[(SymbolId, PredicateSummary)],
|
||||
) -> SmallVec<[(SymbolId, PredicateSummary); 4]> {
|
||||
let mut result = SmallVec::new();
|
||||
let (mut i, mut j) = (0, 0);
|
||||
|
||||
while i < a.len() && j < b.len() {
|
||||
match a[i].0.cmp(&b[j].0) {
|
||||
std::cmp::Ordering::Less => {
|
||||
// Key only in a — intersection with empty = empty → drop
|
||||
i += 1;
|
||||
}
|
||||
std::cmp::Ordering::Greater => {
|
||||
j += 1;
|
||||
}
|
||||
std::cmp::Ordering::Equal => {
|
||||
let joined = a[i].1.join(b[j].1);
|
||||
if !joined.is_empty() {
|
||||
result.push((a[i].0, joined));
|
||||
}
|
||||
i += 1;
|
||||
j += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Keys only in one side → intersection with empty = drop
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Check if a.predicates ⊑ b.predicates.
|
||||
/// More precise (more known_true bits) = lower in the lattice.
|
||||
/// So a ⊑ b means a.known_true ⊇ b.known_true for each key.
|
||||
#[allow(dead_code)] // called by Lattice::leq
|
||||
fn predicates_leq(a: &[(SymbolId, PredicateSummary)], b: &[(SymbolId, PredicateSummary)]) -> bool {
|
||||
let (mut i, mut j) = (0, 0);
|
||||
|
||||
// For each key in b, a must have at least as many bits
|
||||
while j < b.len() {
|
||||
if i >= a.len() {
|
||||
// b has keys that a doesn't — a is missing info = not lower
|
||||
return false;
|
||||
}
|
||||
match a[i].0.cmp(&b[j].0) {
|
||||
std::cmp::Ordering::Less => {
|
||||
// a has extra keys (more info) — OK for leq
|
||||
i += 1;
|
||||
}
|
||||
std::cmp::Ordering::Greater => {
|
||||
// b has a key that a doesn't → a has fewer bits → not ⊑
|
||||
return false;
|
||||
}
|
||||
std::cmp::Ordering::Equal => {
|
||||
// a.known_true must be a superset of b.known_true
|
||||
if a[i].1.known_true & b[j].1.known_true != b[j].1.known_true {
|
||||
return false;
|
||||
}
|
||||
if a[i].1.known_false & b[j].1.known_false != b[j].1.known_false {
|
||||
return false;
|
||||
}
|
||||
i += 1;
|
||||
j += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn make_taint(sym: u32, caps: Cap) -> (SymbolId, VarTaint) {
|
||||
(
|
||||
SymbolId(sym),
|
||||
VarTaint {
|
||||
caps,
|
||||
origins: SmallVec::new(),
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
fn make_taint_with_origin(sym: u32, caps: Cap, node: usize) -> (SymbolId, VarTaint) {
|
||||
(
|
||||
SymbolId(sym),
|
||||
VarTaint {
|
||||
caps,
|
||||
origins: smallvec::smallvec![TaintOrigin {
|
||||
node: NodeIndex::new(node),
|
||||
source_kind: SourceKind::Unknown,
|
||||
}],
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
fn state_with_vars(vars: Vec<(SymbolId, VarTaint)>) -> TaintState {
|
||||
let mut s = TaintState::initial();
|
||||
s.vars = SmallVec::from_vec(vars);
|
||||
s
|
||||
}
|
||||
|
||||
// ── Lattice property tests ──────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn bot_identity() {
|
||||
let a = state_with_vars(vec![make_taint(0, Cap::ENV_VAR)]);
|
||||
assert_eq!(a.join(&TaintState::bot()), a);
|
||||
assert_eq!(TaintState::bot().join(&a), a);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn join_commutativity() {
|
||||
let a = state_with_vars(vec![make_taint(0, Cap::ENV_VAR)]);
|
||||
let b = state_with_vars(vec![make_taint(1, Cap::SHELL_ESCAPE)]);
|
||||
assert_eq!(a.join(&b), b.join(&a));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn join_associativity() {
|
||||
let a = state_with_vars(vec![make_taint(0, Cap::ENV_VAR)]);
|
||||
let b = state_with_vars(vec![make_taint(0, Cap::SHELL_ESCAPE)]);
|
||||
let c = state_with_vars(vec![make_taint(1, Cap::HTML_ESCAPE)]);
|
||||
assert_eq!(a.join(&b).join(&c), a.join(&b.join(&c)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn join_idempotency() {
|
||||
let a = state_with_vars(vec![make_taint(0, Cap::ENV_VAR | Cap::SHELL_ESCAPE)]);
|
||||
assert_eq!(a.join(&a), a);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn leq_reflexive() {
|
||||
let a = state_with_vars(vec![make_taint(0, Cap::ENV_VAR)]);
|
||||
assert!(a.leq(&a));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn leq_consistent_with_join() {
|
||||
let a = state_with_vars(vec![make_taint(0, Cap::ENV_VAR)]);
|
||||
let b = state_with_vars(vec![make_taint(0, Cap::ENV_VAR | Cap::SHELL_ESCAPE)]);
|
||||
assert!(a.leq(&b));
|
||||
assert_eq!(a.join(&b), b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn join_merges_caps() {
|
||||
let a = state_with_vars(vec![make_taint(0, Cap::ENV_VAR)]);
|
||||
let b = state_with_vars(vec![make_taint(0, Cap::SHELL_ESCAPE)]);
|
||||
let joined = a.join(&b);
|
||||
assert_eq!(
|
||||
joined.get(SymbolId(0)).unwrap().caps,
|
||||
Cap::ENV_VAR | Cap::SHELL_ESCAPE
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn join_merges_origins() {
|
||||
let a = state_with_vars(vec![make_taint_with_origin(0, Cap::ENV_VAR, 1)]);
|
||||
let b = state_with_vars(vec![make_taint_with_origin(0, Cap::ENV_VAR, 2)]);
|
||||
let joined = a.join(&b);
|
||||
assert_eq!(joined.get(SymbolId(0)).unwrap().origins.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validated_must_intersection() {
|
||||
let mut a = TaintState::initial();
|
||||
a.validated_must.insert(SymbolId(0));
|
||||
a.validated_must.insert(SymbolId(1));
|
||||
|
||||
let mut b = TaintState::initial();
|
||||
b.validated_must.insert(SymbolId(1));
|
||||
b.validated_must.insert(SymbolId(2));
|
||||
|
||||
let joined = a.join(&b);
|
||||
assert!(!joined.validated_must.contains(SymbolId(0)));
|
||||
assert!(joined.validated_must.contains(SymbolId(1)));
|
||||
assert!(!joined.validated_must.contains(SymbolId(2)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validated_may_union() {
|
||||
let mut a = TaintState::initial();
|
||||
a.validated_may.insert(SymbolId(0));
|
||||
|
||||
let mut b = TaintState::initial();
|
||||
b.validated_may.insert(SymbolId(1));
|
||||
|
||||
let joined = a.join(&b);
|
||||
assert!(joined.validated_may.contains(SymbolId(0)));
|
||||
assert!(joined.validated_may.contains(SymbolId(1)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn predicate_contradiction() {
|
||||
let mut state = TaintState::initial();
|
||||
state.set_predicate(
|
||||
SymbolId(0),
|
||||
PredicateSummary {
|
||||
known_true: 1, // NullCheck true
|
||||
known_false: 1, // NullCheck false
|
||||
},
|
||||
);
|
||||
assert!(state.has_contradiction());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn predicate_no_contradiction() {
|
||||
let mut state = TaintState::initial();
|
||||
state.set_predicate(
|
||||
SymbolId(0),
|
||||
PredicateSummary {
|
||||
known_true: 1, // NullCheck true
|
||||
known_false: 2, // EmptyCheck false (different kind)
|
||||
},
|
||||
);
|
||||
assert!(!state.has_contradiction());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn predicate_join_intersection() {
|
||||
let mut a = TaintState::initial();
|
||||
a.set_predicate(
|
||||
SymbolId(0),
|
||||
PredicateSummary {
|
||||
known_true: 0b011, // NullCheck + EmptyCheck
|
||||
known_false: 0,
|
||||
},
|
||||
);
|
||||
|
||||
let mut b = TaintState::initial();
|
||||
b.set_predicate(
|
||||
SymbolId(0),
|
||||
PredicateSummary {
|
||||
known_true: 0b010, // EmptyCheck only
|
||||
known_false: 0,
|
||||
},
|
||||
);
|
||||
|
||||
let joined = a.join(&b);
|
||||
let pred = joined.get_predicate(SymbolId(0));
|
||||
assert_eq!(pred.known_true, 0b010); // only EmptyCheck on both paths
|
||||
}
|
||||
|
||||
// ── SmallBitSet tests ───────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn small_bitset_basic() {
|
||||
let mut bs = SmallBitSet::empty();
|
||||
assert!(bs.is_empty());
|
||||
|
||||
bs.insert(SymbolId(0));
|
||||
assert!(bs.contains(SymbolId(0)));
|
||||
assert!(!bs.contains(SymbolId(1)));
|
||||
assert!(!bs.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn small_bitset_union_intersection() {
|
||||
let mut a = SmallBitSet::empty();
|
||||
a.insert(SymbolId(0));
|
||||
a.insert(SymbolId(2));
|
||||
|
||||
let mut b = SmallBitSet::empty();
|
||||
b.insert(SymbolId(1));
|
||||
b.insert(SymbolId(2));
|
||||
|
||||
let u = a.union(b);
|
||||
assert!(u.contains(SymbolId(0)));
|
||||
assert!(u.contains(SymbolId(1)));
|
||||
assert!(u.contains(SymbolId(2)));
|
||||
|
||||
let i = a.intersection(b);
|
||||
assert!(!i.contains(SymbolId(0)));
|
||||
assert!(!i.contains(SymbolId(1)));
|
||||
assert!(i.contains(SymbolId(2)));
|
||||
}
|
||||
}
|
||||
563
src/taint/mod.rs
563
src/taint/mod.rs
|
|
@ -1,11 +1,21 @@
|
|||
use crate::cfg::{Cfg, FuncSummaries, NodeInfo, StmtKind};
|
||||
pub mod domain;
|
||||
pub mod path_state;
|
||||
pub mod transfer;
|
||||
|
||||
use crate::cfg::{Cfg, FuncSummaries};
|
||||
use crate::interop::InteropEdge;
|
||||
use crate::labels::{Cap, DataLabel, SourceKind};
|
||||
use crate::labels::SourceKind;
|
||||
use crate::state::engine::{self, MAX_TRACKED_VARS};
|
||||
use crate::state::lattice::Lattice;
|
||||
use crate::state::symbol::SymbolInterner;
|
||||
use crate::summary::GlobalSummaries;
|
||||
use crate::symbol::Lang;
|
||||
use domain::TaintState;
|
||||
use path_state::PredicateKind;
|
||||
use petgraph::graph::NodeIndex;
|
||||
use std::collections::HashMap;
|
||||
use tracing::debug;
|
||||
use petgraph::visit::IntoNodeReferences;
|
||||
use std::collections::HashSet;
|
||||
use transfer::{TaintEvent, TaintTransfer};
|
||||
|
||||
/// A detected taint finding with both source and sink locations.
|
||||
#[derive(Debug, Clone)]
|
||||
|
|
@ -20,269 +30,23 @@ pub struct Finding {
|
|||
pub path: Vec<NodeIndex>,
|
||||
/// The kind of source that originated the taint.
|
||||
pub source_kind: SourceKind,
|
||||
}
|
||||
|
||||
/// Order-independent hash of a taint map.
|
||||
///
|
||||
/// Uses XOR of per-entry hashes so the result is the same regardless of
|
||||
/// iteration order — no allocation or sorting required.
|
||||
fn taint_hash(taint: &HashMap<String, Cap>) -> u64 {
|
||||
let mut h: u64 = 0;
|
||||
for (k, bits) in taint {
|
||||
// Per-entry hash: FNV-1a-style mixing of key bytes + cap bits.
|
||||
let mut entry_h: u64 = 0xcbf2_9ce4_8422_2325; // FNV offset basis
|
||||
for b in k.as_bytes() {
|
||||
entry_h ^= *b as u64;
|
||||
entry_h = entry_h.wrapping_mul(0x0100_0000_01b3); // FNV prime
|
||||
}
|
||||
entry_h ^= bits.bits() as u64;
|
||||
entry_h = entry_h.wrapping_mul(0x0100_0000_01b3);
|
||||
h ^= entry_h;
|
||||
}
|
||||
h
|
||||
}
|
||||
|
||||
/// Resolved summary for a callee — a uniform view regardless of whether the
|
||||
/// summary came from a local (same‑file) or global (cross‑file) source.
|
||||
struct ResolvedSummary {
|
||||
source_caps: Cap,
|
||||
sanitizer_caps: Cap,
|
||||
sink_caps: Cap,
|
||||
propagates_taint: bool,
|
||||
}
|
||||
|
||||
/// Try to resolve a callee name using conservative same-language resolution.
|
||||
///
|
||||
/// Resolution order:
|
||||
/// 1. Local (same-file): exact name + same lang + same namespace
|
||||
/// 2. Global same-language: via `lookup_same_lang`; must be unambiguous
|
||||
/// 3. Interop edges: explicit cross-language bridges
|
||||
/// 4. No cross-language fallback
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn resolve_callee(
|
||||
callee: &str,
|
||||
caller_lang: Lang,
|
||||
caller_namespace: &str,
|
||||
caller_func: &str,
|
||||
call_ordinal: u32,
|
||||
local: &FuncSummaries,
|
||||
global: Option<&GlobalSummaries>,
|
||||
interop_edges: &[InteropEdge],
|
||||
) -> Option<ResolvedSummary> {
|
||||
// 1) Local (same-file): scan local summaries for matching name + lang + namespace
|
||||
let local_matches: Vec<_> = local
|
||||
.iter()
|
||||
.filter(|(k, _)| {
|
||||
k.name == callee && k.lang == caller_lang && k.namespace == caller_namespace
|
||||
})
|
||||
.collect();
|
||||
|
||||
if local_matches.len() == 1 {
|
||||
let (_, ls) = local_matches[0];
|
||||
return Some(ResolvedSummary {
|
||||
source_caps: ls.source_caps,
|
||||
sanitizer_caps: ls.sanitizer_caps,
|
||||
sink_caps: ls.sink_caps,
|
||||
propagates_taint: ls.propagates_taint,
|
||||
});
|
||||
}
|
||||
|
||||
// Multiple local matches — try arity disambiguation (future), for now return None
|
||||
if local_matches.len() > 1 {
|
||||
return None;
|
||||
}
|
||||
|
||||
// 2) Global same-language
|
||||
if let Some(gs) = global {
|
||||
let matches = gs.lookup_same_lang(caller_lang, callee);
|
||||
if matches.len() == 1 {
|
||||
let (_, fs) = matches[0];
|
||||
return Some(ResolvedSummary {
|
||||
source_caps: fs.source_caps(),
|
||||
sanitizer_caps: fs.sanitizer_caps(),
|
||||
sink_caps: fs.sink_caps(),
|
||||
propagates_taint: fs.propagates_taint,
|
||||
});
|
||||
}
|
||||
// Multiple matches — try namespace match first
|
||||
if matches.len() > 1 {
|
||||
let same_ns: Vec<_> = matches
|
||||
.iter()
|
||||
.filter(|(k, _)| k.namespace == caller_namespace)
|
||||
.collect();
|
||||
if same_ns.len() == 1 {
|
||||
let (_, fs) = same_ns[0];
|
||||
return Some(ResolvedSummary {
|
||||
source_caps: fs.source_caps(),
|
||||
sanitizer_caps: fs.sanitizer_caps(),
|
||||
sink_caps: fs.sink_caps(),
|
||||
propagates_taint: fs.propagates_taint,
|
||||
});
|
||||
}
|
||||
// Still ambiguous — return None (conservative)
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
// 3) Interop edges: explicit cross-language bridges
|
||||
for edge in interop_edges {
|
||||
if edge.from.caller_lang == caller_lang
|
||||
&& edge.from.caller_namespace == caller_namespace
|
||||
&& edge.from.callee_symbol == callee
|
||||
&& (edge.from.caller_func.is_empty() || edge.from.caller_func == caller_func)
|
||||
&& (edge.from.ordinal == 0 || edge.from.ordinal == call_ordinal)
|
||||
{
|
||||
// Look up the target in global summaries by exact FuncKey
|
||||
if let Some(gs) = global
|
||||
&& let Some(fs) = gs.get(&edge.to)
|
||||
{
|
||||
return Some(ResolvedSummary {
|
||||
source_caps: fs.source_caps(),
|
||||
sanitizer_caps: fs.sanitizer_caps(),
|
||||
sink_caps: fs.sink_caps(),
|
||||
propagates_taint: fs.propagates_taint,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4) No cross-language fallback
|
||||
None
|
||||
}
|
||||
|
||||
/// Apply taint transfer for a single node, mutating `out` in place.
|
||||
///
|
||||
/// Callers should clone the taint map before calling if they need
|
||||
/// the original state preserved.
|
||||
fn apply_taint(
|
||||
node: &NodeInfo,
|
||||
out: &mut HashMap<String, Cap>,
|
||||
local_summaries: &FuncSummaries,
|
||||
global_summaries: Option<&GlobalSummaries>,
|
||||
caller_lang: Lang,
|
||||
caller_namespace: &str,
|
||||
interop_edges: &[InteropEdge],
|
||||
) {
|
||||
debug!(target: "taint", "Applying taint to node: {:?}", node);
|
||||
debug!(target: "taint", "Taint: {:?}", out);
|
||||
|
||||
let caller_func = node.enclosing_func.as_deref().unwrap_or("");
|
||||
|
||||
match node.label {
|
||||
// A new untrusted value enters the program
|
||||
Some(DataLabel::Source(bits)) => {
|
||||
if let Some(v) = &node.defines {
|
||||
out.insert(v.clone(), bits);
|
||||
}
|
||||
}
|
||||
// Sanitizer: propagate input taint through the assignment FIRST,
|
||||
// then strip the sanitizer's capability bits. This ensures that
|
||||
// `let y = sanitize_html(&x)` gives y the taint of x minus the
|
||||
// HTML_ESCAPE bit — rather than leaving y completely clean (which
|
||||
// would hide "wrong sanitiser for this sink" bugs).
|
||||
Some(DataLabel::Sanitizer(bits)) => {
|
||||
if let Some(v) = &node.defines {
|
||||
// 1. Propagate: union taint from all read variables
|
||||
let mut combined = Cap::empty();
|
||||
for u in &node.uses {
|
||||
if let Some(b) = out.get(u) {
|
||||
combined |= *b;
|
||||
}
|
||||
}
|
||||
// 2. Strip the sanitiser's bits
|
||||
let new = combined & !bits;
|
||||
if new.is_empty() {
|
||||
out.remove(v);
|
||||
} else {
|
||||
out.insert(v.clone(), new);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A function call — resolve against local + global summaries
|
||||
_ if node.kind == StmtKind::Call => {
|
||||
if let Some(callee) = &node.callee
|
||||
&& let Some(resolved) = resolve_callee(
|
||||
callee,
|
||||
caller_lang,
|
||||
caller_namespace,
|
||||
caller_func,
|
||||
node.call_ordinal,
|
||||
local_summaries,
|
||||
global_summaries,
|
||||
interop_edges,
|
||||
)
|
||||
{
|
||||
// Build the return value's taint bits in stages, then
|
||||
// write once at the end. Order matters:
|
||||
//
|
||||
// 1. Start with fresh source taint (if the callee is a source)
|
||||
// 2. Union with propagated arg taint (if the callee propagates)
|
||||
// 3. Strip sanitizer bits last (so sanitization always wins)
|
||||
|
||||
let mut return_bits = Cap::empty();
|
||||
|
||||
// ── 1. Source behaviour ──
|
||||
return_bits |= resolved.source_caps;
|
||||
|
||||
// ── 2. Propagation ──
|
||||
if resolved.propagates_taint {
|
||||
for u in &node.uses {
|
||||
if let Some(bits) = out.get(u) {
|
||||
return_bits |= *bits;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── 3. Sanitizer behaviour (applied last so it always wins) ──
|
||||
return_bits &= !resolved.sanitizer_caps;
|
||||
|
||||
// ── Write the result ──
|
||||
if let Some(v) = &node.defines {
|
||||
if return_bits.is_empty() {
|
||||
out.remove(v);
|
||||
} else {
|
||||
out.insert(v.clone(), return_bits);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Sink behaviour: handled in the main analysis loop
|
||||
// (checked via node.label or resolved summary) ──
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Unresolved call — fall through to default gen/kill below
|
||||
}
|
||||
|
||||
// All other statements: classic gen/kill for assignments
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Default gen/kill: propagate taint through variable assignments
|
||||
if !matches!(
|
||||
node.label,
|
||||
Some(DataLabel::Source(_)) | Some(DataLabel::Sanitizer(_))
|
||||
) && let Some(d) = &node.defines
|
||||
{
|
||||
let mut combined = Cap::empty();
|
||||
for u in &node.uses {
|
||||
if let Some(bits) = out.get(u) {
|
||||
combined |= *bits;
|
||||
}
|
||||
}
|
||||
if combined.is_empty() {
|
||||
out.remove(d);
|
||||
} else {
|
||||
out.insert(d.clone(), combined);
|
||||
}
|
||||
}
|
||||
/// Whether all tainted sink variables are guarded by a validation
|
||||
/// predicate on this path (metadata only — does not change severity).
|
||||
#[allow(dead_code)] // surfaced in Diag output (task 4)
|
||||
pub path_validated: bool,
|
||||
/// The kind of validation guard protecting this path, if any.
|
||||
#[allow(dead_code)] // surfaced in Diag output (task 4)
|
||||
pub guard_kind: Option<PredicateKind>,
|
||||
}
|
||||
|
||||
/// Run taint analysis on a single file's CFG.
|
||||
///
|
||||
/// `global_summaries` is `None` for pass‑1 / single‑file mode and
|
||||
/// `Some(&map)` for pass‑2 cross‑file analysis.
|
||||
/// Uses a monotone forward dataflow analysis via `state::engine::run_forward`
|
||||
/// with the `TaintTransfer` function. Termination is guaranteed by lattice
|
||||
/// finiteness (bounded `Cap` bits × bounded variable count).
|
||||
///
|
||||
/// For JS/TS files: uses a two-level solve to prevent cross-function taint
|
||||
/// leakage while preserving global-to-function flows.
|
||||
pub fn analyse_file(
|
||||
cfg: &Cfg,
|
||||
entry: NodeIndex,
|
||||
|
|
@ -292,162 +56,155 @@ pub fn analyse_file(
|
|||
caller_namespace: &str,
|
||||
interop_edges: &[InteropEdge],
|
||||
) -> Vec<Finding> {
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
let _span = tracing::debug_span!("taint_analyse_file").entered();
|
||||
|
||||
/// Queue item: current CFG node + taint map that holds here
|
||||
#[derive(Clone)]
|
||||
struct Item {
|
||||
node: NodeIndex,
|
||||
taint: HashMap<String, Cap>,
|
||||
// 1. Build symbol interner from CFG
|
||||
let interner = SymbolInterner::from_cfg(cfg);
|
||||
|
||||
if interner.len() > MAX_TRACKED_VARS {
|
||||
tracing::warn!(
|
||||
symbols = interner.len(),
|
||||
max = MAX_TRACKED_VARS,
|
||||
"taint analysis: too many variables, some will be ignored"
|
||||
);
|
||||
}
|
||||
|
||||
// (node, taint_hash) → predecessor key (for path rebuild)
|
||||
type Key = (NodeIndex, u64);
|
||||
let mut pred: HashMap<Key, Key> = HashMap::new();
|
||||
// 2. Build base transfer function
|
||||
let base_transfer = TaintTransfer {
|
||||
lang: caller_lang,
|
||||
namespace: caller_namespace,
|
||||
interner: &interner, // also used for events_to_findings below
|
||||
local_summaries,
|
||||
global_summaries,
|
||||
interop_edges,
|
||||
global_seed: None,
|
||||
scope_filter: None,
|
||||
};
|
||||
|
||||
// Seen states so we do not revisit them infinitely
|
||||
let mut seen: HashSet<Key> = HashSet::new();
|
||||
// 3. Run analysis (two-level for JS/TS, single-pass otherwise)
|
||||
let events = if matches!(caller_lang, Lang::JavaScript | Lang::TypeScript) {
|
||||
analyse_js_two_level(cfg, entry, &interner, &base_transfer)
|
||||
} else {
|
||||
let result = engine::run_forward(cfg, entry, &base_transfer, TaintState::initial());
|
||||
result.events
|
||||
};
|
||||
|
||||
// Resulting findings: (sink_node, source_node, full_path)
|
||||
let mut findings: Vec<Finding> = Vec::new();
|
||||
// 4. Convert events to findings
|
||||
let mut findings = events_to_findings(&events, &interner);
|
||||
|
||||
let mut q = VecDeque::new();
|
||||
q.push_back(Item {
|
||||
node: entry,
|
||||
taint: HashMap::new(),
|
||||
});
|
||||
seen.insert((entry, 0));
|
||||
// 5. Deduplicate findings by (sink, source), prefer path_validated=true
|
||||
findings.sort_by_key(|f| (f.sink.index(), f.source.index(), !f.path_validated));
|
||||
findings.dedup_by_key(|f| (f.sink, f.source));
|
||||
|
||||
while let Some(Item { node, taint }) = q.pop_front() {
|
||||
let caller_func = cfg[node].enclosing_func.as_deref().unwrap_or("");
|
||||
let mut out = taint.clone();
|
||||
apply_taint(
|
||||
&cfg[node],
|
||||
&mut out,
|
||||
local_summaries,
|
||||
global_summaries,
|
||||
caller_lang,
|
||||
caller_namespace,
|
||||
interop_edges,
|
||||
);
|
||||
findings
|
||||
}
|
||||
|
||||
// ── Sink check ──────────────────────────────────────────────────
|
||||
// Two ways a node can be a sink:
|
||||
// 1. Its AST label says Sink (existing inline labels)
|
||||
// 2. Its callee resolves to a function with sink_caps (cross-file)
|
||||
let sink_caps = match cfg[node].label {
|
||||
Some(DataLabel::Sink(caps)) => caps,
|
||||
_ => {
|
||||
// check if callee resolves to a sink
|
||||
cfg[node]
|
||||
.callee
|
||||
.as_ref()
|
||||
.and_then(|c| {
|
||||
resolve_callee(
|
||||
c,
|
||||
caller_lang,
|
||||
caller_namespace,
|
||||
caller_func,
|
||||
cfg[node].call_ordinal,
|
||||
local_summaries,
|
||||
global_summaries,
|
||||
interop_edges,
|
||||
)
|
||||
})
|
||||
.filter(|r| !r.sink_caps.is_empty())
|
||||
.map(|r| r.sink_caps)
|
||||
.unwrap_or(Cap::empty())
|
||||
}
|
||||
/// JS/TS two-level solve to prevent cross-function taint leakage.
|
||||
///
|
||||
/// Level 1: Solve top-level code (nodes where `enclosing_func.is_none()`).
|
||||
/// Level 2: For each function, solve seeded with top-level taint.
|
||||
fn analyse_js_two_level(
|
||||
cfg: &Cfg,
|
||||
entry: NodeIndex,
|
||||
_interner: &SymbolInterner,
|
||||
base_transfer: &TaintTransfer,
|
||||
) -> Vec<TaintEvent> {
|
||||
// Level 1: solve top-level only
|
||||
let toplevel_transfer = TaintTransfer {
|
||||
lang: base_transfer.lang,
|
||||
namespace: base_transfer.namespace,
|
||||
interner: base_transfer.interner,
|
||||
local_summaries: base_transfer.local_summaries,
|
||||
global_summaries: base_transfer.global_summaries,
|
||||
interop_edges: base_transfer.interop_edges,
|
||||
global_seed: None,
|
||||
scope_filter: Some(None), // top-level only (enclosing_func == None)
|
||||
};
|
||||
|
||||
let toplevel_result =
|
||||
engine::run_forward(cfg, entry, &toplevel_transfer, TaintState::initial());
|
||||
|
||||
// Extract top-level taint state at the last converged point
|
||||
let toplevel_state = extract_exit_state(&toplevel_result.states);
|
||||
|
||||
// Level 2: solve each function seeded with top-level state
|
||||
let mut all_events = toplevel_result.events;
|
||||
|
||||
let func_entries = find_function_entries(cfg);
|
||||
for (func_name, func_entry) in &func_entries {
|
||||
let func_transfer = TaintTransfer {
|
||||
lang: base_transfer.lang,
|
||||
namespace: base_transfer.namespace,
|
||||
interner: base_transfer.interner,
|
||||
local_summaries: base_transfer.local_summaries,
|
||||
global_summaries: base_transfer.global_summaries,
|
||||
interop_edges: base_transfer.interop_edges,
|
||||
global_seed: Some(&toplevel_state),
|
||||
scope_filter: Some(Some(func_name.as_str())),
|
||||
};
|
||||
|
||||
if !sink_caps.is_empty() {
|
||||
let bad = cfg[node]
|
||||
.uses
|
||||
.iter()
|
||||
.any(|u| out.get(u).is_some_and(|b| (*b & sink_caps) != Cap::empty()));
|
||||
if bad {
|
||||
// Reconstruct path backwards from sink to source.
|
||||
//
|
||||
// A node is considered a "source" if:
|
||||
// 1. It has an inline DataLabel::Source (same-file), OR
|
||||
// 2. It is a Call whose callee resolves to a source via
|
||||
// local or global summaries (cross-file).
|
||||
let sink_node = node;
|
||||
let mut path = vec![node];
|
||||
let mut source_node = node; // fallback: sink itself
|
||||
let mut key = (node, taint_hash(&taint));
|
||||
let func_result =
|
||||
engine::run_forward(cfg, *func_entry, &func_transfer, TaintState::initial());
|
||||
all_events.extend(func_result.events);
|
||||
}
|
||||
|
||||
while let Some(&(prev, prev_hash)) = pred.get(&key) {
|
||||
path.push(prev);
|
||||
all_events
|
||||
}
|
||||
|
||||
// Check inline source label
|
||||
if matches!(cfg[prev].label, Some(DataLabel::Source(_))) {
|
||||
source_node = prev;
|
||||
break;
|
||||
}
|
||||
/// Extract the "best" taint state from converged states (join all exit/reachable states).
|
||||
fn extract_exit_state(states: &std::collections::HashMap<NodeIndex, TaintState>) -> TaintState {
|
||||
let mut result = TaintState::initial();
|
||||
for state in states.values() {
|
||||
result = result.join(state);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
// Check cross-file source via resolved callee summary
|
||||
let prev_caller_func = cfg[prev].enclosing_func.as_deref().unwrap_or("");
|
||||
if cfg[prev].kind == StmtKind::Call
|
||||
&& let Some(callee) = &cfg[prev].callee
|
||||
&& let Some(resolved) = resolve_callee(
|
||||
callee,
|
||||
caller_lang,
|
||||
caller_namespace,
|
||||
prev_caller_func,
|
||||
cfg[prev].call_ordinal,
|
||||
local_summaries,
|
||||
global_summaries,
|
||||
interop_edges,
|
||||
)
|
||||
&& !resolved.source_caps.is_empty()
|
||||
{
|
||||
source_node = prev;
|
||||
break;
|
||||
}
|
||||
/// Find function entry nodes: (func_name, entry_node) pairs.
|
||||
///
|
||||
/// A function entry is the first node with a given `enclosing_func` value.
|
||||
fn find_function_entries(cfg: &Cfg) -> Vec<(String, NodeIndex)> {
|
||||
let mut seen = HashSet::new();
|
||||
let mut entries = Vec::new();
|
||||
|
||||
key = (prev, prev_hash);
|
||||
}
|
||||
|
||||
path.reverse();
|
||||
|
||||
// Infer the source kind from the source node's label and callee
|
||||
let source_kind = match cfg[source_node].label {
|
||||
Some(DataLabel::Source(caps)) => {
|
||||
let callee = cfg[source_node].callee.as_deref().unwrap_or("");
|
||||
crate::labels::infer_source_kind(caps, callee)
|
||||
}
|
||||
_ => SourceKind::Unknown,
|
||||
};
|
||||
|
||||
findings.push(Finding {
|
||||
sink: sink_node,
|
||||
source: source_node,
|
||||
path,
|
||||
source_kind,
|
||||
});
|
||||
}
|
||||
for (idx, info) in cfg.node_references() {
|
||||
if let Some(ref func_name) = info.enclosing_func
|
||||
&& seen.insert(func_name.clone())
|
||||
{
|
||||
entries.push((func_name.clone(), idx));
|
||||
}
|
||||
}
|
||||
|
||||
// enqueue successors — cache hashes to avoid recomputation
|
||||
let out_h = taint_hash(&out);
|
||||
let in_h = taint_hash(&taint);
|
||||
let succs: Vec<_> = cfg.neighbors(node).collect();
|
||||
for (i, succ) in succs.iter().enumerate() {
|
||||
let key = (*succ, out_h);
|
||||
if !seen.contains(&key) {
|
||||
seen.insert(key);
|
||||
pred.insert(key, (node, in_h));
|
||||
// Move the map into the last successor to avoid a clone
|
||||
let taint_for_succ = if i + 1 == succs.len() {
|
||||
std::mem::take(&mut out)
|
||||
} else {
|
||||
out.clone()
|
||||
};
|
||||
q.push_back(Item {
|
||||
node: *succ,
|
||||
taint: taint_for_succ,
|
||||
});
|
||||
entries
|
||||
}
|
||||
|
||||
/// Convert TaintEvents into Findings.
|
||||
fn events_to_findings(events: &[TaintEvent], _interner: &SymbolInterner) -> Vec<Finding> {
|
||||
let mut findings = Vec::new();
|
||||
|
||||
for event in events {
|
||||
let TaintEvent::SinkReached {
|
||||
sink_node,
|
||||
tainted_vars,
|
||||
all_validated,
|
||||
guard_kind,
|
||||
..
|
||||
} = event;
|
||||
|
||||
// Collect unique origins across all tainted vars at this sink
|
||||
let mut seen_origins: HashSet<(usize, usize)> = HashSet::new();
|
||||
for (_sym, _caps, origins) in tainted_vars {
|
||||
for origin in origins {
|
||||
if seen_origins.insert((origin.node.index(), sink_node.index())) {
|
||||
findings.push(Finding {
|
||||
sink: *sink_node,
|
||||
source: origin.node,
|
||||
path: vec![origin.node, *sink_node],
|
||||
source_kind: origin.source_kind,
|
||||
path_validated: *all_validated,
|
||||
guard_kind: *guard_kind,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
234
src/taint/path_state.rs
Normal file
234
src/taint/path_state.rs
Normal file
|
|
@ -0,0 +1,234 @@
|
|||
// ─── PredicateKind ───────────────────────────────────────────────────────────
|
||||
|
||||
/// Classification of what an if-condition tests.
|
||||
///
|
||||
/// Determined by heuristic analysis of the raw condition text.
|
||||
/// Classification is conservative: prefer [`Unknown`](PredicateKind::Unknown)
|
||||
/// over a wrong guess.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum PredicateKind {
|
||||
/// `x.is_none()`, `x == null`, `x == nil`, `x is None`
|
||||
NullCheck,
|
||||
/// `x.is_empty()`, `x.len() == 0`, `x == ""`
|
||||
EmptyCheck,
|
||||
/// `x.is_err()`, `x.is_ok()`, `err != nil`
|
||||
ErrorCheck,
|
||||
/// Call to a validation/guard function: `validate(x)`, `is_safe(x)`
|
||||
ValidationCall,
|
||||
/// Call to a sanitizer function: `sanitize(x)`, `escape(x)`
|
||||
SanitizerCall,
|
||||
/// Comparison operators: `x == 5`, `x > threshold`
|
||||
Comparison,
|
||||
/// Generic boolean test — cannot classify further.
|
||||
Unknown,
|
||||
}
|
||||
|
||||
/// Classify a raw condition text into a [`PredicateKind`].
|
||||
///
|
||||
/// # Rules
|
||||
///
|
||||
/// - Empty/None text → [`Unknown`](PredicateKind::Unknown).
|
||||
/// - `ValidationCall` / `SanitizerCall` require a `(` in the text **and** a
|
||||
/// matching callee token. This avoids misclassifying comparisons like
|
||||
/// `x_valid == true`.
|
||||
/// - Prefers [`Unknown`](PredicateKind::Unknown) over false positives.
|
||||
pub fn classify_condition(text: &str) -> PredicateKind {
|
||||
if text.is_empty() {
|
||||
return PredicateKind::Unknown;
|
||||
}
|
||||
|
||||
let lower = text.to_ascii_lowercase();
|
||||
|
||||
// ── Error checks (before null checks: `err != nil` is an error check,
|
||||
// not a null check, even though it contains `!= nil`) ──────────────
|
||||
if lower.contains("is_err")
|
||||
|| lower.contains("is_ok")
|
||||
|| lower.contains("err != nil")
|
||||
|| lower.contains("err == nil")
|
||||
|| lower.contains("error != nil")
|
||||
|| lower.contains("error == nil")
|
||||
{
|
||||
return PredicateKind::ErrorCheck;
|
||||
}
|
||||
|
||||
// ── Null checks ──────────────────────────────────────────────────────
|
||||
if lower.contains("is_none")
|
||||
|| lower.contains("is_some")
|
||||
|| lower.contains("== none")
|
||||
|| lower.contains("!= none")
|
||||
|| lower.contains("is none")
|
||||
|| lower.contains("is not none")
|
||||
|| lower.contains("== null")
|
||||
|| lower.contains("!= null")
|
||||
|| lower.contains("=== null")
|
||||
|| lower.contains("!== null")
|
||||
|| lower.contains("== nil")
|
||||
|| lower.contains("!= nil")
|
||||
{
|
||||
return PredicateKind::NullCheck;
|
||||
}
|
||||
|
||||
// ── Empty checks ─────────────────────────────────────────────────────
|
||||
if lower.contains("is_empty")
|
||||
|| lower.contains(".len() == 0")
|
||||
|| lower.contains(".len() != 0")
|
||||
|| lower.contains(".length == 0")
|
||||
|| lower.contains(".length === 0")
|
||||
|| lower.contains(".length != 0")
|
||||
|| lower.contains(".length !== 0")
|
||||
|| lower.contains("== \"\"")
|
||||
|| lower.contains("== ''")
|
||||
{
|
||||
return PredicateKind::EmptyCheck;
|
||||
}
|
||||
|
||||
// ── Call-based kinds (require `(` to be present) ─────────────────────
|
||||
if lower.contains('(') {
|
||||
// Extract a rough callee token: everything before the first `(`
|
||||
// that looks like an identifier (letters, digits, underscores, dots).
|
||||
let callee_part = lower.split('(').next().unwrap_or("");
|
||||
// Take the last segment (after `.` or `::`) as the bare name.
|
||||
let bare = callee_part
|
||||
.rsplit(['.', ':'])
|
||||
.next()
|
||||
.unwrap_or(callee_part)
|
||||
.trim();
|
||||
|
||||
// Validation
|
||||
if bare.contains("valid")
|
||||
|| bare.contains("check")
|
||||
|| bare.contains("verify")
|
||||
|| bare.starts_with("is_safe")
|
||||
|| bare.starts_with("is_authorized")
|
||||
|| bare.starts_with("is_authenticated")
|
||||
{
|
||||
return PredicateKind::ValidationCall;
|
||||
}
|
||||
|
||||
// Sanitizer
|
||||
if bare.contains("sanitiz") || bare.contains("escape") || bare.contains("encode") {
|
||||
return PredicateKind::SanitizerCall;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Comparison operators ─────────────────────────────────────────────
|
||||
if lower.contains("==")
|
||||
|| lower.contains("!=")
|
||||
|| lower.contains(">=")
|
||||
|| lower.contains("<=")
|
||||
|| lower.contains(" > ")
|
||||
|| lower.contains(" < ")
|
||||
{
|
||||
return PredicateKind::Comparison;
|
||||
}
|
||||
|
||||
PredicateKind::Unknown
|
||||
}
|
||||
|
||||
// ─── Tests ───────────────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// ── classify_condition ────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn classify_empty_is_unknown() {
|
||||
assert_eq!(classify_condition(""), PredicateKind::Unknown);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_null_checks() {
|
||||
assert_eq!(classify_condition("x.is_none()"), PredicateKind::NullCheck);
|
||||
assert_eq!(classify_condition("x == null"), PredicateKind::NullCheck);
|
||||
assert_eq!(classify_condition("x != nil"), PredicateKind::NullCheck);
|
||||
assert_eq!(classify_condition("x is None"), PredicateKind::NullCheck);
|
||||
assert_eq!(classify_condition("x === null"), PredicateKind::NullCheck);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_error_checks() {
|
||||
assert_eq!(classify_condition("x.is_err()"), PredicateKind::ErrorCheck);
|
||||
assert_eq!(classify_condition("err != nil"), PredicateKind::ErrorCheck);
|
||||
assert_eq!(classify_condition("x.is_ok()"), PredicateKind::ErrorCheck);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_empty_checks() {
|
||||
assert_eq!(
|
||||
classify_condition("x.is_empty()"),
|
||||
PredicateKind::EmptyCheck
|
||||
);
|
||||
assert_eq!(
|
||||
classify_condition("x.len() == 0"),
|
||||
PredicateKind::EmptyCheck
|
||||
);
|
||||
assert_eq!(
|
||||
classify_condition("x.length === 0"),
|
||||
PredicateKind::EmptyCheck
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_validation_call() {
|
||||
assert_eq!(
|
||||
classify_condition("validate(x)"),
|
||||
PredicateKind::ValidationCall
|
||||
);
|
||||
assert_eq!(
|
||||
classify_condition("is_safe(input)"),
|
||||
PredicateKind::ValidationCall
|
||||
);
|
||||
assert_eq!(
|
||||
classify_condition("check_auth(req)"),
|
||||
PredicateKind::ValidationCall
|
||||
);
|
||||
assert_eq!(
|
||||
classify_condition("input.verify(sig)"),
|
||||
PredicateKind::ValidationCall
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_validation_requires_paren() {
|
||||
// `x_valid == true` should NOT be ValidationCall — no `(` call syntax.
|
||||
assert_eq!(
|
||||
classify_condition("x_valid == true"),
|
||||
PredicateKind::Comparison
|
||||
);
|
||||
assert_eq!(
|
||||
classify_condition("is_valid && ready"),
|
||||
PredicateKind::Unknown
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_sanitizer_call() {
|
||||
assert_eq!(
|
||||
classify_condition("sanitize(x)"),
|
||||
PredicateKind::SanitizerCall
|
||||
);
|
||||
assert_eq!(
|
||||
classify_condition("html_escape(s)"),
|
||||
PredicateKind::SanitizerCall
|
||||
);
|
||||
assert_eq!(
|
||||
classify_condition("url_encode(path)"),
|
||||
PredicateKind::SanitizerCall
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_comparison() {
|
||||
assert_eq!(classify_condition("x == 5"), PredicateKind::Comparison);
|
||||
assert_eq!(classify_condition("x != y"), PredicateKind::Comparison);
|
||||
assert_eq!(classify_condition("a >= b"), PredicateKind::Comparison);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_unknown_fallback() {
|
||||
assert_eq!(classify_condition("flag"), PredicateKind::Unknown);
|
||||
assert_eq!(classify_condition("a && b"), PredicateKind::Unknown);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
use super::*;
|
||||
use crate::cfg::FuncSummaries;
|
||||
use crate::interop::InteropEdge;
|
||||
use crate::labels::Cap;
|
||||
use crate::symbol::FuncKey;
|
||||
|
||||
#[test]
|
||||
|
|
@ -52,8 +53,10 @@ fn taint_through_if_else() {
|
|||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
|
||||
// exactly one path (via the True branch) should be flagged
|
||||
assert_eq!(findings.len(), 1);
|
||||
// Both branches have findings: the true branch uses unsanitized `x`,
|
||||
// the else branch uses `safe` which was sanitized with HTML_ESCAPE
|
||||
// but the sink requires SHELL_ESCAPE (wrong sanitizer → still tainted).
|
||||
assert_eq!(findings.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -2218,3 +2221,318 @@ fn return_call_recognized_as_source() {
|
|||
"foo() should have source_caps set because env::var is called inside return"
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Path-sensitive analysis tests ───────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn validate_and_early_return() {
|
||||
use crate::cfg::build_cfg;
|
||||
use tree_sitter::Language;
|
||||
|
||||
// Validate before use: if validation fails, early return.
|
||||
// The sink after the guard is on the "validated" path.
|
||||
//
|
||||
// The CFG creates a synthetic pass-through node for the false path
|
||||
// with an explicit False edge from the If node. BFS reaches the
|
||||
// sink via: cond → (False) → pass-through → (Seq) → sink.
|
||||
// The predicate on the False edge records that `!validate(&x)` was
|
||||
// false (i.e. validation passed), so the sink is path-guarded.
|
||||
let src = br#"
|
||||
use std::env; use std::process::Command;
|
||||
fn main() {
|
||||
let x = env::var("INPUT").unwrap();
|
||||
if !validate(&x) { return; }
|
||||
Command::new("sh").arg(x).status().unwrap();
|
||||
}"#;
|
||||
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
|
||||
// Taint still flows (validate doesn't kill taint), but the finding
|
||||
// should be annotated as path_validated because the false path
|
||||
// (validation passed) has a ValidationCall predicate with polarity=true.
|
||||
assert_eq!(findings.len(), 1, "should still detect the taint flow");
|
||||
assert!(
|
||||
findings[0].path_validated,
|
||||
"finding should be marked as path_validated (early-return guard detected)"
|
||||
);
|
||||
assert_eq!(
|
||||
findings[0].guard_kind,
|
||||
Some(PredicateKind::ValidationCall),
|
||||
"guard_kind should be ValidationCall"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_in_if_else_path_validated() {
|
||||
use crate::cfg::build_cfg;
|
||||
use tree_sitter::Language;
|
||||
|
||||
// If/else where the True branch (validation passed) contains the sink.
|
||||
// This IS detectable because the If node has genuine True/False branches.
|
||||
let src = br#"
|
||||
use std::env; use std::process::Command;
|
||||
fn main() {
|
||||
let x = env::var("INPUT").unwrap();
|
||||
if validate(&x) {
|
||||
Command::new("sh").arg(&x).status().unwrap();
|
||||
} else {
|
||||
println!("invalid input");
|
||||
}
|
||||
}"#;
|
||||
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
|
||||
assert_eq!(findings.len(), 1, "should detect the taint flow");
|
||||
assert!(
|
||||
findings[0].path_validated,
|
||||
"finding should be path_validated (sink in validated branch)"
|
||||
);
|
||||
assert_eq!(
|
||||
findings[0].guard_kind,
|
||||
Some(PredicateKind::ValidationCall),
|
||||
"guard_kind should be ValidationCall"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sink_on_failed_validation_branch() {
|
||||
use crate::cfg::build_cfg;
|
||||
use tree_sitter::Language;
|
||||
|
||||
// Sink is in the failed-validation branch (negated condition, false edge).
|
||||
let src = br#"
|
||||
use std::env; use std::process::Command;
|
||||
fn main() {
|
||||
let x = env::var("INPUT").unwrap();
|
||||
if !validate(&x) {
|
||||
Command::new("sh").arg(&x).status().unwrap();
|
||||
}
|
||||
}"#;
|
||||
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
|
||||
assert_eq!(findings.len(), 1, "should detect taint flow to sink");
|
||||
assert!(
|
||||
!findings[0].path_validated,
|
||||
"finding should NOT be path_validated (sink is in failed-validation branch)"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn contradictory_null_check_pruned() {
|
||||
use crate::cfg::build_cfg;
|
||||
use tree_sitter::Language;
|
||||
|
||||
// Inner branch is infeasible: if x.is_none() then x cannot also be is_none().
|
||||
// After early return on is_none(), the fall-through path has polarity=false
|
||||
// for NullCheck. The inner `if x.is_none()` True branch has polarity=true —
|
||||
// contradiction.
|
||||
let src = br#"
|
||||
use std::env; use std::process::Command;
|
||||
fn main() {
|
||||
let x = env::var("INPUT").ok();
|
||||
if x.is_none() { return; }
|
||||
if x.is_none() {
|
||||
Command::new("sh").arg("dangerous").status().unwrap();
|
||||
}
|
||||
}"#;
|
||||
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
|
||||
// The inner branch is infeasible, and the arg "dangerous" is a string
|
||||
// literal (not tainted), so there should be no findings.
|
||||
assert!(
|
||||
findings.is_empty(),
|
||||
"inner branch is infeasible — should produce no findings (got {})",
|
||||
findings.len()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sanitize_one_branch_no_regression() {
|
||||
use crate::cfg::build_cfg;
|
||||
use tree_sitter::Language;
|
||||
|
||||
// Same as existing taint_through_if_else: sanitized in one branch, not in the other.
|
||||
// Verify the finding count stays at 1 (no regression from path sensitivity).
|
||||
let src = br#"
|
||||
use std::env; use std::process::Command;
|
||||
fn main() {
|
||||
let x = env::var("DANGEROUS").unwrap();
|
||||
let safe = html_escape::encode_safe(&x);
|
||||
|
||||
if x.len() > 5 {
|
||||
Command::new("sh").arg(&x).status().unwrap(); // UNSAFE
|
||||
} else {
|
||||
Command::new("sh").arg(&safe).status().unwrap(); // SAFE
|
||||
}
|
||||
}"#;
|
||||
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
|
||||
// Both branches produce findings: the true branch uses unsanitized `x`,
|
||||
// the else branch uses `safe` (HTML_ESCAPE sanitizer vs SHELL_ESCAPE sink).
|
||||
// Previously only 1 finding because else_clause was silently dropped from CFG.
|
||||
assert_eq!(
|
||||
findings.len(),
|
||||
2,
|
||||
"two findings expected (both branches reach sink with wrong/no sanitizer)"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn path_state_budget_graceful() {
|
||||
use crate::cfg::build_cfg;
|
||||
use tree_sitter::Language;
|
||||
|
||||
// Deeply nested ifs with a sink at the innermost level.
|
||||
// PathState should truncate gracefully after MAX_PATH_PREDICATES.
|
||||
let src = br#"
|
||||
use std::env; use std::process::Command;
|
||||
fn main() {
|
||||
let x = env::var("INPUT").unwrap();
|
||||
if x.len() > 1 {
|
||||
if x.len() > 2 {
|
||||
if x.len() > 3 {
|
||||
if x.len() > 4 {
|
||||
if x.len() > 5 {
|
||||
if x.len() > 6 {
|
||||
if x.len() > 7 {
|
||||
if x.len() > 8 {
|
||||
if x.len() > 9 {
|
||||
Command::new("sh").arg(&x).status().unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}"#;
|
||||
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
|
||||
// Should still detect the flow — truncation shouldn't cause false negatives.
|
||||
assert_eq!(
|
||||
findings.len(),
|
||||
1,
|
||||
"should detect taint flow even with truncated PathState"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_predicate_not_pruned() {
|
||||
use crate::cfg::build_cfg;
|
||||
use tree_sitter::Language;
|
||||
|
||||
// Comparison predicates are NOT in the contradiction whitelist, so even
|
||||
// seemingly contradictory comparisons should not be pruned.
|
||||
let src = br#"
|
||||
use std::env; use std::process::Command;
|
||||
fn main() {
|
||||
let x = env::var("INPUT").unwrap();
|
||||
if x.len() > 5 { return; }
|
||||
if x.len() > 5 {
|
||||
Command::new("sh").arg(&x).status().unwrap();
|
||||
}
|
||||
}"#;
|
||||
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
|
||||
// Comparison is not in the whitelist — the path should NOT be pruned.
|
||||
assert_eq!(
|
||||
findings.len(),
|
||||
1,
|
||||
"Comparison predicate should not cause contradiction pruning"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multi_var_predicate_not_pruned() {
|
||||
use crate::cfg::build_cfg;
|
||||
use tree_sitter::Language;
|
||||
|
||||
// Multi-variable conditions should never be pruned for contradiction,
|
||||
// even if the kind is in the whitelist.
|
||||
let src = br#"
|
||||
use std::env; use std::process::Command;
|
||||
fn main() {
|
||||
let x = env::var("INPUT").unwrap();
|
||||
let y = env::var("OTHER").ok();
|
||||
if y.is_none() { return; }
|
||||
if y.is_none() {
|
||||
Command::new("sh").arg(&x).status().unwrap();
|
||||
}
|
||||
}"#;
|
||||
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let findings = analyse_file(&cfg, entry, &summaries, None, Lang::Rust, "test.rs", &[]);
|
||||
|
||||
// Note: y.is_none() condition references `y` and `is_none` — two idents.
|
||||
// Wait, `is_none` is a method — collect_idents finds `y` and `is_none` as
|
||||
// separate identifiers. That makes it multi-var, so contradiction should
|
||||
// NOT fire. However, the actual behavior depends on how many idents
|
||||
// collect_idents extracts from `y.is_none()`. If it returns ["y", "is_none"],
|
||||
// then the predicate has 2 vars → multi-var → not pruned → finding exists.
|
||||
assert!(
|
||||
!findings.is_empty(),
|
||||
"multi-var predicate should not be pruned; flow should be detected"
|
||||
);
|
||||
}
|
||||
|
|
|
|||
458
src/taint/transfer.rs
Normal file
458
src/taint/transfer.rs
Normal file
|
|
@ -0,0 +1,458 @@
|
|||
use crate::callgraph::normalize_callee_name;
|
||||
use crate::cfg::{EdgeKind, FuncSummaries, NodeInfo, StmtKind};
|
||||
use crate::interop::InteropEdge;
|
||||
use crate::labels::{Cap, DataLabel};
|
||||
use crate::state::engine::Transfer;
|
||||
use crate::state::lattice::Lattice;
|
||||
use crate::state::symbol::{SymbolId, SymbolInterner};
|
||||
use crate::summary::{CalleeResolution, GlobalSummaries};
|
||||
use crate::symbol::Lang;
|
||||
use crate::taint::domain::{TaintOrigin, TaintState, VarTaint, predicate_kind_bit};
|
||||
use crate::taint::path_state::{PredicateKind, classify_condition};
|
||||
use petgraph::graph::NodeIndex;
|
||||
use smallvec::SmallVec;
|
||||
|
||||
/// Events emitted by the taint transfer function during Phase 2.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum TaintEvent {
|
||||
SinkReached {
|
||||
sink_node: NodeIndex,
|
||||
tainted_vars: Vec<(SymbolId, Cap, SmallVec<[TaintOrigin; 2]>)>,
|
||||
#[allow(dead_code)]
|
||||
sink_caps: Cap,
|
||||
all_validated: bool,
|
||||
guard_kind: Option<PredicateKind>,
|
||||
},
|
||||
}
|
||||
|
||||
/// Taint transfer function for forward dataflow analysis.
|
||||
pub struct TaintTransfer<'a> {
|
||||
pub lang: Lang,
|
||||
pub namespace: &'a str,
|
||||
pub interner: &'a SymbolInterner,
|
||||
pub local_summaries: &'a FuncSummaries,
|
||||
pub global_summaries: Option<&'a GlobalSummaries>,
|
||||
pub interop_edges: &'a [InteropEdge],
|
||||
/// For JS two-level solve: top-level taint state seeded into function solves.
|
||||
pub global_seed: Option<&'a TaintState>,
|
||||
/// Optional scope filter: if set, only process nodes whose enclosing_func matches.
|
||||
/// None = process all nodes. Some(None) = top-level only. Some(Some(name)) = function only.
|
||||
pub scope_filter: Option<Option<&'a str>>,
|
||||
}
|
||||
|
||||
impl Transfer<TaintState> for TaintTransfer<'_> {
|
||||
type Event = TaintEvent;
|
||||
|
||||
fn apply(
|
||||
&self,
|
||||
node: NodeIndex,
|
||||
info: &NodeInfo,
|
||||
edge: Option<EdgeKind>,
|
||||
mut state: TaintState,
|
||||
) -> (TaintState, Vec<TaintEvent>) {
|
||||
let mut events = Vec::new();
|
||||
|
||||
// Scope filter: skip nodes outside our scope (return state unchanged)
|
||||
if let Some(ref filter) = self.scope_filter {
|
||||
let node_func = info.enclosing_func.as_deref();
|
||||
if node_func != *filter {
|
||||
return (state, events);
|
||||
}
|
||||
}
|
||||
|
||||
let caller_func = info.enclosing_func.as_deref().unwrap_or("");
|
||||
|
||||
// ── Apply taint transfer ────────────────────────────────────────
|
||||
match info.label {
|
||||
Some(DataLabel::Source(bits)) => {
|
||||
self.apply_source(node, info, bits, &mut state);
|
||||
}
|
||||
Some(DataLabel::Sanitizer(bits)) => {
|
||||
self.apply_sanitizer(info, bits, &mut state);
|
||||
}
|
||||
_ if info.kind == StmtKind::Call => {
|
||||
self.apply_call(node, info, caller_func, &mut state);
|
||||
}
|
||||
_ => {
|
||||
self.apply_assignment(info, &mut state);
|
||||
}
|
||||
}
|
||||
|
||||
// ── If-node predicate handling (edge-aware) ─────────────────────
|
||||
if info.kind == StmtKind::If
|
||||
&& !info.condition_vars.is_empty()
|
||||
&& matches!(edge, Some(EdgeKind::True) | Some(EdgeKind::False))
|
||||
{
|
||||
let cond_text = info.condition_text.as_deref().unwrap_or("");
|
||||
let kind = classify_condition(cond_text);
|
||||
let polarity = matches!(edge, Some(EdgeKind::True)) ^ info.condition_negated;
|
||||
|
||||
// ValidationCall handling
|
||||
if kind == PredicateKind::ValidationCall && polarity {
|
||||
for var in &info.condition_vars {
|
||||
if let Some(sym) = self.interner.get(var) {
|
||||
state.validated_may.insert(sym);
|
||||
state.validated_must.insert(sym);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Predicate summary for whitelisted kinds (contradiction pruning)
|
||||
if let Some(bit_idx) = predicate_kind_bit(kind) {
|
||||
for var in &info.condition_vars {
|
||||
if let Some(sym) = self.interner.get(var) {
|
||||
let mut summary = state.get_predicate(sym);
|
||||
if polarity {
|
||||
summary.known_true |= 1 << bit_idx;
|
||||
} else {
|
||||
summary.known_false |= 1 << bit_idx;
|
||||
}
|
||||
state.set_predicate(sym, summary);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Contradiction pruning: if any variable has contradictory predicates,
|
||||
// this is an infeasible path → return bot (monotonically kills branch).
|
||||
if state.has_contradiction() {
|
||||
return (TaintState::bot(), events);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Sink check ──────────────────────────────────────────────────
|
||||
let sink_caps = self.resolve_sink_caps(info, caller_func);
|
||||
if !sink_caps.is_empty() {
|
||||
let tainted_vars = self.collect_tainted_sink_vars(info, &state, sink_caps);
|
||||
if !tainted_vars.is_empty() {
|
||||
let all_validated = tainted_vars
|
||||
.iter()
|
||||
.all(|(sym, _, _)| state.validated_may.contains(*sym));
|
||||
|
||||
let guard_kind = if all_validated {
|
||||
Some(PredicateKind::ValidationCall)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
events.push(TaintEvent::SinkReached {
|
||||
sink_node: node,
|
||||
tainted_vars,
|
||||
sink_caps,
|
||||
all_validated,
|
||||
guard_kind,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
(state, events)
|
||||
}
|
||||
|
||||
fn iteration_budget(&self) -> usize {
|
||||
100_000
|
||||
}
|
||||
|
||||
fn on_budget_exceeded(&self) -> bool {
|
||||
tracing::warn!("taint analysis: worklist budget exceeded, returning partial results");
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl TaintTransfer<'_> {
|
||||
/// Apply a Source label: insert taint for the defined variable.
|
||||
fn apply_source(&self, node: NodeIndex, info: &NodeInfo, bits: Cap, state: &mut TaintState) {
|
||||
if let Some(ref v) = info.defines
|
||||
&& let Some(sym) = self.interner.get(v)
|
||||
{
|
||||
let callee = info.callee.as_deref().unwrap_or("");
|
||||
let source_kind = crate::labels::infer_source_kind(bits, callee);
|
||||
let origin = TaintOrigin { node, source_kind };
|
||||
|
||||
match state.get(sym) {
|
||||
Some(existing) => {
|
||||
let mut new_taint = existing.clone();
|
||||
new_taint.caps |= bits;
|
||||
if new_taint.origins.len() < 4
|
||||
&& !new_taint.origins.iter().any(|o| o.node == node)
|
||||
{
|
||||
new_taint.origins.push(origin);
|
||||
}
|
||||
state.set(sym, new_taint);
|
||||
}
|
||||
None => {
|
||||
state.set(
|
||||
sym,
|
||||
VarTaint {
|
||||
caps: bits,
|
||||
origins: SmallVec::from_elem(origin, 1),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply a Sanitizer label: propagate input taint, then strip sanitizer bits.
|
||||
fn apply_sanitizer(&self, info: &NodeInfo, bits: Cap, state: &mut TaintState) {
|
||||
if let Some(ref v) = info.defines
|
||||
&& let Some(sym) = self.interner.get(v)
|
||||
{
|
||||
let (combined_caps, combined_origins) = self.collect_uses_taint(info, state);
|
||||
let new_caps = combined_caps & !bits;
|
||||
if new_caps.is_empty() {
|
||||
state.remove(sym);
|
||||
} else {
|
||||
state.set(
|
||||
sym,
|
||||
VarTaint {
|
||||
caps: new_caps,
|
||||
origins: combined_origins,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply a function call: resolve callee and compute return taint.
|
||||
fn apply_call(
|
||||
&self,
|
||||
node: NodeIndex,
|
||||
info: &NodeInfo,
|
||||
caller_func: &str,
|
||||
state: &mut TaintState,
|
||||
) {
|
||||
if let Some(ref callee) = info.callee
|
||||
&& let Some(resolved) = self.resolve_callee(callee, caller_func, info.call_ordinal)
|
||||
{
|
||||
let mut return_bits = Cap::empty();
|
||||
let mut return_origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new();
|
||||
|
||||
// 1. Source behaviour
|
||||
if !resolved.source_caps.is_empty() {
|
||||
return_bits |= resolved.source_caps;
|
||||
let callee_str = info.callee.as_deref().unwrap_or("");
|
||||
let source_kind =
|
||||
crate::labels::infer_source_kind(resolved.source_caps, callee_str);
|
||||
let origin = TaintOrigin { node, source_kind };
|
||||
if !return_origins.iter().any(|o| o.node == node) {
|
||||
return_origins.push(origin);
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Propagation
|
||||
if resolved.propagates_taint {
|
||||
let (use_caps, use_origins) = self.collect_uses_taint(info, state);
|
||||
return_bits |= use_caps;
|
||||
for orig in &use_origins {
|
||||
if return_origins.len() < 4
|
||||
&& !return_origins.iter().any(|o| o.node == orig.node)
|
||||
{
|
||||
return_origins.push(*orig);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Sanitizer behaviour (applied last so it always wins)
|
||||
return_bits &= !resolved.sanitizer_caps;
|
||||
|
||||
// Write result
|
||||
if let Some(ref v) = info.defines
|
||||
&& let Some(sym) = self.interner.get(v)
|
||||
{
|
||||
if return_bits.is_empty() {
|
||||
state.remove(sym);
|
||||
} else {
|
||||
state.set(
|
||||
sym,
|
||||
VarTaint {
|
||||
caps: return_bits,
|
||||
origins: return_origins,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Unresolved call — fall through to default gen/kill
|
||||
self.apply_assignment(info, state);
|
||||
}
|
||||
|
||||
/// Default gen/kill: propagate taint through variable assignments.
|
||||
fn apply_assignment(&self, info: &NodeInfo, state: &mut TaintState) {
|
||||
if matches!(
|
||||
info.label,
|
||||
Some(DataLabel::Source(_)) | Some(DataLabel::Sanitizer(_))
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
if let Some(ref d) = info.defines
|
||||
&& let Some(sym) = self.interner.get(d)
|
||||
{
|
||||
let (combined_caps, combined_origins) = self.collect_uses_taint(info, state);
|
||||
if combined_caps.is_empty() {
|
||||
state.remove(sym);
|
||||
} else {
|
||||
state.set(
|
||||
sym,
|
||||
VarTaint {
|
||||
caps: combined_caps,
|
||||
origins: combined_origins,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Collect taint from all `uses` variables (union of caps + merge origins).
|
||||
fn collect_uses_taint(
|
||||
&self,
|
||||
info: &NodeInfo,
|
||||
state: &TaintState,
|
||||
) -> (Cap, SmallVec<[TaintOrigin; 2]>) {
|
||||
let mut combined_caps = Cap::empty();
|
||||
let mut combined_origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new();
|
||||
|
||||
for u in &info.uses {
|
||||
let taint = self.lookup_var(u, state);
|
||||
if let Some(t) = taint {
|
||||
combined_caps |= t.caps;
|
||||
for orig in &t.origins {
|
||||
if combined_origins.len() < 4
|
||||
&& !combined_origins.iter().any(|o| o.node == orig.node)
|
||||
{
|
||||
combined_origins.push(*orig);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(combined_caps, combined_origins)
|
||||
}
|
||||
|
||||
/// Look up a variable's taint, falling back to global_seed for JS two-level solve.
|
||||
fn lookup_var<'a>(&'a self, name: &str, state: &'a TaintState) -> Option<&'a VarTaint> {
|
||||
if let Some(sym) = self.interner.get(name) {
|
||||
if let Some(taint) = state.get(sym) {
|
||||
return Some(taint);
|
||||
}
|
||||
// Fall back to global seed (JS two-level solve)
|
||||
if let Some(seed) = self.global_seed {
|
||||
return seed.get(sym);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Resolve sink caps from label or callee summary.
|
||||
fn resolve_sink_caps(&self, info: &NodeInfo, caller_func: &str) -> Cap {
|
||||
match info.label {
|
||||
Some(DataLabel::Sink(caps)) => caps,
|
||||
_ => info
|
||||
.callee
|
||||
.as_ref()
|
||||
.and_then(|c| self.resolve_callee(c, caller_func, info.call_ordinal))
|
||||
.filter(|r| !r.sink_caps.is_empty())
|
||||
.map(|r| r.sink_caps)
|
||||
.unwrap_or(Cap::empty()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Collect tainted variables at a sink node.
|
||||
fn collect_tainted_sink_vars(
|
||||
&self,
|
||||
info: &NodeInfo,
|
||||
state: &TaintState,
|
||||
sink_caps: Cap,
|
||||
) -> Vec<(SymbolId, Cap, SmallVec<[TaintOrigin; 2]>)> {
|
||||
let mut result = Vec::new();
|
||||
for u in &info.uses {
|
||||
if let Some(taint) = self.lookup_var(u, state)
|
||||
&& (taint.caps & sink_caps) != Cap::empty()
|
||||
&& let Some(sym) = self.interner.get(u)
|
||||
{
|
||||
result.push((sym, taint.caps, taint.origins.clone()));
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
/// Resolve a callee name to its summary (local → global → interop).
|
||||
fn resolve_callee(
|
||||
&self,
|
||||
callee: &str,
|
||||
caller_func: &str,
|
||||
call_ordinal: u32,
|
||||
) -> Option<ResolvedSummary> {
|
||||
let normalized = normalize_callee_name(callee);
|
||||
|
||||
// 1) Local (same-file)
|
||||
let local_matches: Vec<_> = self
|
||||
.local_summaries
|
||||
.iter()
|
||||
.filter(|(k, _)| {
|
||||
k.name == normalized && k.lang == self.lang && k.namespace == self.namespace
|
||||
})
|
||||
.collect();
|
||||
|
||||
if local_matches.len() == 1 {
|
||||
let (_, ls) = local_matches[0];
|
||||
return Some(ResolvedSummary {
|
||||
source_caps: ls.source_caps,
|
||||
sanitizer_caps: ls.sanitizer_caps,
|
||||
sink_caps: ls.sink_caps,
|
||||
propagates_taint: ls.propagates_taint,
|
||||
});
|
||||
}
|
||||
if local_matches.len() > 1 {
|
||||
return None;
|
||||
}
|
||||
|
||||
// 2) Global same-language
|
||||
if let Some(gs) = self.global_summaries {
|
||||
match gs.resolve_callee_key(normalized, self.lang, self.namespace, None) {
|
||||
CalleeResolution::Resolved(target_key) => {
|
||||
if let Some(fs) = gs.get(&target_key) {
|
||||
return Some(ResolvedSummary {
|
||||
source_caps: fs.source_caps(),
|
||||
sanitizer_caps: fs.sanitizer_caps(),
|
||||
sink_caps: fs.sink_caps(),
|
||||
propagates_taint: fs.propagates_taint,
|
||||
});
|
||||
}
|
||||
}
|
||||
CalleeResolution::NotFound | CalleeResolution::Ambiguous(_) => {}
|
||||
}
|
||||
}
|
||||
|
||||
// 3) Interop edges
|
||||
for edge in self.interop_edges {
|
||||
if edge.from.caller_lang == self.lang
|
||||
&& edge.from.caller_namespace == self.namespace
|
||||
&& edge.from.callee_symbol == callee
|
||||
&& (edge.from.caller_func.is_empty() || edge.from.caller_func == caller_func)
|
||||
&& (edge.from.ordinal == 0 || edge.from.ordinal == call_ordinal)
|
||||
&& let Some(gs) = self.global_summaries
|
||||
&& let Some(fs) = gs.get(&edge.to)
|
||||
{
|
||||
return Some(ResolvedSummary {
|
||||
source_caps: fs.source_caps(),
|
||||
sanitizer_caps: fs.sanitizer_caps(),
|
||||
sink_caps: fs.sink_caps(),
|
||||
propagates_taint: fs.propagates_taint,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolved summary for a callee.
|
||||
struct ResolvedSummary {
|
||||
source_caps: Cap,
|
||||
sanitizer_caps: Cap,
|
||||
sink_caps: Cap,
|
||||
propagates_taint: bool,
|
||||
}
|
||||
|
|
@ -61,6 +61,10 @@ pub struct ScannerConfig {
|
|||
/// benchmarks, etc.) at their original severity. When false (default),
|
||||
/// findings in these paths are downgraded by one severity tier.
|
||||
pub include_nonprod: bool,
|
||||
|
||||
/// Enable the state-model dataflow engine for resource lifecycle and
|
||||
/// auth-state analysis. Default: false (opt-in).
|
||||
pub enable_state_analysis: bool,
|
||||
}
|
||||
impl Default for ScannerConfig {
|
||||
fn default() -> Self {
|
||||
|
|
@ -94,6 +98,7 @@ impl Default for ScannerConfig {
|
|||
follow_symlinks: false,
|
||||
scan_hidden_files: false,
|
||||
include_nonprod: false,
|
||||
enable_state_analysis: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -135,6 +140,60 @@ pub struct OutputConfig {
|
|||
|
||||
/// The maximum number of results to show.
|
||||
pub max_results: Option<u32>,
|
||||
|
||||
/// Enable attack-surface ranking to sort findings by exploitability.
|
||||
pub attack_surface_ranking: bool,
|
||||
|
||||
/// Minimum attack-surface score to include in output.
|
||||
/// Findings below this threshold are dropped after ranking.
|
||||
/// `None` means no minimum (all findings shown).
|
||||
pub min_score: Option<u32>,
|
||||
|
||||
/// Minimum confidence level to include in output.
|
||||
/// `None` means no minimum (all findings shown).
|
||||
#[serde(
|
||||
default,
|
||||
skip_serializing_if = "Option::is_none",
|
||||
deserialize_with = "deserialize_confidence_opt"
|
||||
)]
|
||||
pub min_confidence: Option<crate::evidence::Confidence>,
|
||||
|
||||
/// Include Quality-category findings (excluded by default).
|
||||
#[serde(default)]
|
||||
pub include_quality: bool,
|
||||
|
||||
/// Show all findings: disables category filtering, rollups, and LOW budgets.
|
||||
#[serde(default)]
|
||||
pub show_all: bool,
|
||||
|
||||
/// Maximum total LOW findings to show.
|
||||
#[serde(default = "default_max_low")]
|
||||
pub max_low: u32,
|
||||
|
||||
/// Maximum LOW findings per file.
|
||||
#[serde(default = "default_max_low_per_file")]
|
||||
pub max_low_per_file: u32,
|
||||
|
||||
/// Maximum LOW findings per rule.
|
||||
#[serde(default = "default_max_low_per_rule")]
|
||||
pub max_low_per_rule: u32,
|
||||
|
||||
/// Number of example locations to store in rollup findings.
|
||||
#[serde(default = "default_rollup_examples")]
|
||||
pub rollup_examples: u32,
|
||||
}
|
||||
|
||||
fn default_max_low() -> u32 {
|
||||
20
|
||||
}
|
||||
fn default_max_low_per_file() -> u32 {
|
||||
1
|
||||
}
|
||||
fn default_max_low_per_rule() -> u32 {
|
||||
10
|
||||
}
|
||||
fn default_rollup_examples() -> u32 {
|
||||
5
|
||||
}
|
||||
|
||||
impl Default for OutputConfig {
|
||||
|
|
@ -143,10 +202,36 @@ impl Default for OutputConfig {
|
|||
default_format: "console".into(),
|
||||
quiet: false,
|
||||
max_results: None,
|
||||
attack_surface_ranking: true,
|
||||
min_score: None,
|
||||
min_confidence: None,
|
||||
include_quality: false,
|
||||
show_all: false,
|
||||
max_low: 20,
|
||||
max_low_per_file: 1,
|
||||
max_low_per_rule: 10,
|
||||
rollup_examples: 5,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Deserialize an optional Confidence from a TOML string.
|
||||
fn deserialize_confidence_opt<'de, D>(
|
||||
deserializer: D,
|
||||
) -> Result<Option<crate::evidence::Confidence>, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
let opt: Option<String> = Option::deserialize(deserializer)?;
|
||||
match opt {
|
||||
None => Ok(None),
|
||||
Some(s) => s
|
||||
.parse::<crate::evidence::Confidence>()
|
||||
.map(Some)
|
||||
.map_err(serde::de::Error::custom),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(default)]
|
||||
pub struct PerformanceConfig {
|
||||
|
|
@ -303,6 +388,7 @@ fn merge_configs(mut default: Config, user: Config) -> Config {
|
|||
default.scanner.follow_symlinks = user.scanner.follow_symlinks;
|
||||
default.scanner.scan_hidden_files = user.scanner.scan_hidden_files;
|
||||
default.scanner.include_nonprod = user.scanner.include_nonprod;
|
||||
default.scanner.enable_state_analysis = user.scanner.enable_state_analysis;
|
||||
|
||||
// Merge exclusion lists (default ⊔ user), then sort & dedupe
|
||||
default
|
||||
|
|
@ -328,6 +414,15 @@ fn merge_configs(mut default: Config, user: Config) -> Config {
|
|||
default.output.default_format = user.output.default_format;
|
||||
default.output.quiet = user.output.quiet;
|
||||
default.output.max_results = user.output.max_results;
|
||||
default.output.attack_surface_ranking = user.output.attack_surface_ranking;
|
||||
default.output.min_score = user.output.min_score;
|
||||
default.output.min_confidence = user.output.min_confidence;
|
||||
default.output.include_quality = user.output.include_quality;
|
||||
default.output.show_all = user.output.show_all;
|
||||
default.output.max_low = user.output.max_low;
|
||||
default.output.max_low_per_file = user.output.max_low_per_file;
|
||||
default.output.max_low_per_rule = user.output.max_low_per_rule;
|
||||
default.output.rollup_examples = user.output.rollup_examples;
|
||||
|
||||
// --- PerformanceConfig ---
|
||||
default.performance.max_depth = user.performance.max_depth;
|
||||
|
|
|
|||
|
|
@ -147,8 +147,8 @@ pub fn spawn_file_walker(root: &Path, cfg: &Config) -> (Receiver<Paths>, JoinHan
|
|||
#[test]
|
||||
fn walker_respects_excluded_extensions() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
std::fs::write(tmp.path().join("keep.rs"), "fn main(){}").unwrap();
|
||||
std::fs::write(tmp.path().join("skip.txt"), "ignored").unwrap();
|
||||
std::fs::write(tmp.path().join("keep.rs"), "fn main(){}").unwrap(); // nyx:ignore cfg-unguarded-sink
|
||||
std::fs::write(tmp.path().join("skip.txt"), "ignored").unwrap(); // nyx:ignore cfg-unguarded-sink
|
||||
|
||||
let mut cfg = Config::default();
|
||||
cfg.scanner.excluded_extensions = vec!["txt".into()];
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue