fixed some dynamic and static bugs and failing test cases

This commit is contained in:
elipeter 2026-06-03 15:56:00 -05:00
parent 20093972a9
commit ddf9ff13e2
10 changed files with 215 additions and 35 deletions

View file

@ -1003,6 +1003,7 @@ fn is_test_suppressible_pattern(id: &str) -> bool {
// deterministic test data, insecure RNG used for fixture seeding.
id.ends_with(".secrets.hardcoded_secret")
|| id.ends_with(".secrets.hardcoded_key")
|| id.ends_with(".crypto.hardcoded_key")
|| id.ends_with(".crypto.math_random")
|| id.ends_with(".crypto.insecure_random")
|| id.ends_with(".crypto.weak_digest")
@ -5484,14 +5485,14 @@ struct TaintSuppressionCtx {
/// 11 inline analysis but the sink's enclosing scope has no
/// labelled Sanitizer of its own.
interproc_sanitizer_callers: HashSet<Option<String>>,
/// Union of resolved sink-cap bits across every taint / structural
/// flow finding (`taint-*`, `cfg-unguarded-sink`) at each line. Used
/// by [`Self::is_redundant_ast_pattern`] to drop an AST-pattern finding
/// that merely restates a flow the taint engine already reported at the
/// same line with the same cap — the flow finding carries strictly more
/// evidence (source, path, sanitizer state), so keeping the bare pattern
/// alongside it is pure duplicate noise.
taint_finding_caps_by_line: HashMap<usize, u32>,
/// Union of resolved sink-cap bits for cap-specific taint findings at
/// each line. Used by [`Self::is_redundant_ast_pattern`] to drop an
/// AST-pattern finding only when the flow engine already emitted a
/// specific rule id for the same vulnerability class. Legacy generic
/// findings (`taint-unsanitised-flow`, `cfg-unguarded-sink`) are not
/// canonical enough to subsume language-specific AST rule IDs such as
/// `py.cmdi.subprocess_shell` or `c.cmdi.system`.
specific_taint_finding_caps_by_line: HashMap<usize, u32>,
}
impl TaintSuppressionCtx {
@ -5690,15 +5691,21 @@ impl TaintSuppressionCtx {
.map(|d| d.line)
.collect();
// Cap bits per line for every flow-backed finding (taint-* and the
// structural unguarded-sink finding), so a redundant AST pattern at
// the same line+cap can be dropped in favour of the richer flow.
let mut taint_finding_caps_by_line: HashMap<usize, u32> = HashMap::new();
// Cap bits per line for cap-specific flow-backed findings only, so a
// redundant AST pattern at the same line+cap can be dropped in favour
// of the richer flow. Do not count legacy generic findings here:
// `taint-unsanitised-flow` and `cfg-unguarded-sink` carry evidence,
// but their rule ids are deliberately catch-alls, while AST `cmdi`,
// `sqli`, etc. IDs are the canonical namespace many tests, SARIF
// consumers, and dynamic-verification spec derivation rely on.
let mut specific_taint_finding_caps_by_line: HashMap<usize, u32> = HashMap::new();
for d in taint_diags {
if d.id.starts_with("taint-") || d.id == "cfg-unguarded-sink" {
if d.id.starts_with("taint-") && !d.id.starts_with("taint-unsanitised-flow") {
if let Some(caps) = d.evidence.as_ref().map(|e| e.sink_caps) {
if caps != 0 {
*taint_finding_caps_by_line.entry(d.line).or_default() |= caps;
*specific_taint_finding_caps_by_line
.entry(d.line)
.or_default() |= caps;
}
}
}
@ -5727,7 +5734,7 @@ impl TaintSuppressionCtx {
engine_validated_funcs,
source_killed_funcs,
interproc_sanitizer_callers,
taint_finding_caps_by_line,
specific_taint_finding_caps_by_line,
}
}
@ -5746,7 +5753,7 @@ impl TaintSuppressionCtx {
let Some(cap) = pattern_category_cap(pattern_id) else {
return false;
};
self.taint_finding_caps_by_line
self.specific_taint_finding_caps_by_line
.get(&line)
.is_some_and(|caps| caps & cap.bits() != 0)
}

View file

@ -902,6 +902,24 @@ fn is_self_scoped_session_base(base: &str) -> bool {
| "ctx.session.currentUser"
| "ctx.state.user"
| "ctx.state.currentUser"
// The caller's own id from the session is self-scoped: fetching
// your own record with it is not IDOR (only a foreign,
// request-supplied id is). The `.user` forms above missed the
// `req.session.userId` / `session.uid` idiom.
| "req.session.userId"
| "request.session.userId"
| "session.userId"
| "req.session.userid"
| "request.session.userid"
| "session.userid"
| "req.session.uid"
| "request.session.uid"
| "session.uid"
| "ctx.session.userId"
| "ctx.session.userid"
| "ctx.session.uid"
| "ctx.state.userId"
| "ctx.state.uid"
)
}

View file

@ -3948,6 +3948,27 @@ fn collect_param_names(
}
}
}
// TypeScript `required_parameter` / `optional_parameter`. Descend only
// into the binding `pattern`, never the `type` annotation: the default
// arm harvests id-like names from object-type fields (`user: { id }`)
// and lifts typed-bounded scalar ids (`UserId: number`) into
// `unit.params`, over-firing the user-input gate on non-route helpers.
// Mirrors the Rust `parameter` arm plus the Go/Python id-like filter.
"required_parameter" | "optional_parameter" => {
if let Some(pattern) = node.child_by_field_name("pattern") {
if pattern.kind() == "identifier" && node.child_by_field_name("type").is_some() {
let name = text(pattern, bytes);
if !name.is_empty()
&& !out.contains(&name)
&& (include_id_like_typed || !is_python_id_like_typed_param(&name))
{
out.push(name);
}
} else {
collect_param_names(pattern, bytes, include_id_like_typed, out);
}
}
}
_ => {
for idx in 0..node.named_child_count() {
let Some(child) = node.named_child(idx as u32) else {

View file

@ -221,11 +221,8 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::HTML_ESCAPE),
case_sensitive: false,
},
LabelRule {
matchers: &["res.redirect"],
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: false,
},
// `res.redirect` is OPEN_REDIRECT only (see the dedicated rule below): a
// 302 to the browser is client-side navigation, not SSRF.
LabelRule {
matchers: &["res.sendFile", "res.download"],
label: DataLabel::Sink(Cap::FILE_IO),
@ -911,6 +908,37 @@ pub static GATED_SINKS: &[SinkGate] = &[
object_destination_fields: &["url", "prefixUrl"],
},
},
// `request` npm library: `request.get(url)` / `request.post(url, …)`. The
// Destination gate fires only on a tainted URL arg, so the `req.get(header)`
// header-read collision (constant arg 0) never activates.
SinkGate {
callee_matcher: "request.get",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &["url", "uri"],
},
},
SinkGate {
callee_matcher: "request.post",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &["url", "uri"],
},
},
// `undici.request(url | opts[, opts])`, opts exposes `origin` and
// `path`. Body-ish fields (`body`, `headers`) are excluded.
SinkGate {

View file

@ -670,6 +670,31 @@ pub fn is_js_ts_handler_param_name(name: &str) -> bool {
false
}
/// Bare bindings denoting an Express/Koa request sub-object when the handler
/// param is destructured (`({ query }, res) => …`). Kept out of
/// [`is_js_ts_handler_param_name`] so a plain param named `query`/`body` is
/// never seeded; the SSA seeder additionally requires a sibling response param.
const JS_TS_REQUEST_FIELD_NAMES: &[&str] =
&["query", "body", "params", "headers", "cookies", "cookie"];
/// True when `name` is a bare destructured request-field binding. Only
/// meaningful behind the destructured-handler-param gate in the SSA seeder.
pub fn is_express_request_field_name(name: &str) -> bool {
JS_TS_REQUEST_FIELD_NAMES
.iter()
.any(|candidate| candidate.eq_ignore_ascii_case(name))
}
/// True for the conventional Express/Koa/Fastify response-object parameter
/// (`res`/`response`/`reply`) — the structural signal that a function is a
/// route handler, so a sibling destructured `{ query }` is a real source.
pub fn is_handler_response_param_name(name: &str) -> bool {
matches!(
name.to_ascii_lowercase().as_str(),
"res" | "response" | "reply"
)
}
#[inline(always)]
pub fn lookup(lang: &str, raw: &str) -> Kind {
CLASSIFIERS

View file

@ -528,13 +528,12 @@ pub static GATED_SINKS: &[SinkGate] = &[
// is a `Location: ...` header, so the dashboard / OWASP bucket
// correctly classifies redirect-class flows independently of CRLF.
//
// Activation: arg 0 prefix `Location:` (case-insensitive). When arg
// 0 is a constant string starting with `Location:` the gate fires and
// checks payload arg 0 for taint; constants like `Content-Type: ...`
// are suppressed by the safe-literal branch. When arg 0 is a binary
// expression (`"Location: " . $url`) or otherwise dynamic, the
// value-extraction returns `None` and the gate fires conservatively
// — matching the existing convention in `setAttribute`/`parseFromString`.
// Fires only on a positive `Location:` literal at arg 0 (a constant, or a
// concat whose leading literal is `Location:` — `extract_const_string_arg`
// returns the left-most literal). `LiteralOnly` makes the dynamic/unknown
// case suppress rather than fire conservatively, so `header($notALocation)`
// and 404-status-line forms no longer mis-classify as OPEN_REDIRECT. The
// flat HEADER_INJECTION sink above still fires on any tainted `header()`.
SinkGate {
callee_matcher: "=header",
arg_index: 0,
@ -545,7 +544,7 @@ pub static GATED_SINKS: &[SinkGate] = &[
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::ValueMatch,
activation: GateActivation::LiteralOnly,
},
// Smarty `$smarty->fetch($name)` — only the `string:` resource prefix
// accepts an inline template *source*; the bare form (`page.tpl`) is a

View file

@ -186,11 +186,8 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::HTML_ESCAPE),
case_sensitive: false,
},
LabelRule {
matchers: &["res.redirect"],
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: false,
},
// `res.redirect` is OPEN_REDIRECT only (dedicated rule below): a 302 to the
// browser is client-side navigation, not SSRF.
LabelRule {
matchers: &["res.sendFile", "res.download"],
label: DataLabel::Sink(Cap::FILE_IO),
@ -693,6 +690,36 @@ pub static GATED_SINKS: &[SinkGate] = &[
object_destination_fields: &["url", "prefixUrl"],
},
},
// `request` npm library: `request.get(url)` / `request.post(url, …)`.
// Destination gate fires only on a tainted URL arg. Mirrors javascript.rs.
SinkGate {
callee_matcher: "request.get",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &["url", "uri"],
},
},
SinkGate {
callee_matcher: "request.post",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &["url", "uri"],
},
},
SinkGate {
callee_matcher: "undici.request",
arg_index: 0,

View file

@ -162,6 +162,24 @@ pub const PATTERNS: &[Pattern] = &[
category: PatternCategory::Secrets,
confidence: Confidence::Medium,
},
// ── Tier A: Hardcoded cryptographic key/secret config ──────────────
// Crypto-key-shaped keys (`cookieSecret`, `cryptoKey`, `signingKey`, …) the
// anchored `hardcoded_secret` regex misses. Emits a `crypto`-bucketing id
// (a `*.secrets.*` id buckets as `other`). Benign `publicKey`/`primaryKey`/
// `keyName`/bare `key` are rejected by the prefix requirement.
Pattern {
id: "js.crypto.hardcoded_key",
description: "Hardcoded cryptographic key/secret in source config",
query: r#"(pair
key: (property_identifier) @key
(#match? @key "(?i)^([a-z0-9]+secret|(crypto|cookie|session|signing|encryption|encrypt|private|master|jwt|hmac|secret)key|api[_-]?key|access[_-]?key|secret[_-]?key|private[_-]?key|encryption[_-]?key|signing[_-]?key)$")
value: (string) @val (#match? @val "[^\"']{3,}"))
@vuln"#,
severity: Severity::Low,
tier: PatternTier::A,
category: PatternCategory::Crypto,
confidence: Confidence::Medium,
},
// ── Tier A: Open redirect ──────────────────────────────────────────
Pattern {
id: "js.xss.location_assign",

View file

@ -133,6 +133,22 @@ pub const PATTERNS: &[Pattern] = &[
category: PatternCategory::Secrets,
confidence: Confidence::Medium,
},
// ── Tier A: Hardcoded cryptographic key/secret config ──────────────
// Crypto-key-shaped keys the anchored `hardcoded_secret` regex misses;
// emits a `crypto`-bucketing rule id. See javascript.rs for rationale.
Pattern {
id: "ts.crypto.hardcoded_key",
description: "Hardcoded cryptographic key/secret in source config",
query: r#"(pair
key: (property_identifier) @key
(#match? @key "(?i)^([a-z0-9]+secret|(crypto|cookie|session|signing|encryption|encrypt|private|master|jwt|hmac|secret)key|api[_-]?key|access[_-]?key|secret[_-]?key|private[_-]?key|encryption[_-]?key|signing[_-]?key)$")
value: (string) @val (#match? @val "[^\"']{3,}"))
@vuln"#,
severity: Severity::Low,
tier: PatternTier::A,
category: PatternCategory::Crypto,
confidence: Confidence::Medium,
},
// ── Tier A: TypeScript-specific type-safety escapes ────────────────
Pattern {
id: "ts.quality.any_annotation",

View file

@ -5981,7 +5981,28 @@ pub(super) fn transfer_inst(
.split_once('.')
.map(|(root, _)| crate::labels::is_js_ts_handler_param_name(root))
.unwrap_or(false);
if crate::labels::is_js_ts_handler_param_name(var_name) || root_is_handler {
// Destructured Express request param (`({ query }, res) =>
// …`): `query` lowers as a bare `Param`, so the textual
// `req.query` source label never matches. Seed it only when
// a sibling response param is present (the route-handler
// signal), so a plain `paginate(query)` stays un-seeded.
let is_destructured_request_field =
crate::labels::is_express_request_field_name(var_name) && {
let eb = &ssa.blocks[ssa.entry.0 as usize];
eb.phis.iter().chain(eb.body.iter()).any(|i| {
matches!(i.op, SsaOp::Param { .. })
&& ssa
.value_defs
.get(i.value.0 as usize)
.and_then(|vd| vd.var_name.as_deref())
.map(crate::labels::is_handler_response_param_name)
.unwrap_or(false)
})
};
if crate::labels::is_js_ts_handler_param_name(var_name)
|| root_is_handler
|| is_destructured_request_field
{
let origin = TaintOrigin {
node: inst.cfg_node,
source_kind: SourceKind::UserInput,