From ddf9ff13e22fcf46f3f4af2e654f9caac2567de8 Mon Sep 17 00:00:00 2001 From: elipeter Date: Wed, 3 Jun 2026 15:56:00 -0500 Subject: [PATCH] fixed some dynamic and static bugs and failing test cases --- src/ast.rs | 39 +++++++++++++++++------------ src/auth_analysis/checks.rs | 18 +++++++++++++ src/auth_analysis/extract/common.rs | 21 ++++++++++++++++ src/labels/javascript.rs | 38 ++++++++++++++++++++++++---- src/labels/mod.rs | 25 ++++++++++++++++++ src/labels/php.rs | 15 ++++++----- src/labels/typescript.rs | 37 +++++++++++++++++++++++---- src/patterns/javascript.rs | 18 +++++++++++++ src/patterns/typescript.rs | 16 ++++++++++++ src/taint/ssa_transfer/mod.rs | 23 ++++++++++++++++- 10 files changed, 215 insertions(+), 35 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 7e1e791b..a61ba78a 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1003,6 +1003,7 @@ fn is_test_suppressible_pattern(id: &str) -> bool { // deterministic test data, insecure RNG used for fixture seeding. id.ends_with(".secrets.hardcoded_secret") || id.ends_with(".secrets.hardcoded_key") + || id.ends_with(".crypto.hardcoded_key") || id.ends_with(".crypto.math_random") || id.ends_with(".crypto.insecure_random") || id.ends_with(".crypto.weak_digest") @@ -5484,14 +5485,14 @@ struct TaintSuppressionCtx { /// 11 inline analysis but the sink's enclosing scope has no /// labelled Sanitizer of its own. interproc_sanitizer_callers: HashSet>, - /// Union of resolved sink-cap bits across every taint / structural - /// flow finding (`taint-*`, `cfg-unguarded-sink`) at each line. Used - /// by [`Self::is_redundant_ast_pattern`] to drop an AST-pattern finding - /// that merely restates a flow the taint engine already reported at the - /// same line with the same cap — the flow finding carries strictly more - /// evidence (source, path, sanitizer state), so keeping the bare pattern - /// alongside it is pure duplicate noise. - taint_finding_caps_by_line: HashMap, + /// Union of resolved sink-cap bits for cap-specific taint findings at + /// each line. Used by [`Self::is_redundant_ast_pattern`] to drop an + /// AST-pattern finding only when the flow engine already emitted a + /// specific rule id for the same vulnerability class. Legacy generic + /// findings (`taint-unsanitised-flow`, `cfg-unguarded-sink`) are not + /// canonical enough to subsume language-specific AST rule IDs such as + /// `py.cmdi.subprocess_shell` or `c.cmdi.system`. + specific_taint_finding_caps_by_line: HashMap, } impl TaintSuppressionCtx { @@ -5690,15 +5691,21 @@ impl TaintSuppressionCtx { .map(|d| d.line) .collect(); - // Cap bits per line for every flow-backed finding (taint-* and the - // structural unguarded-sink finding), so a redundant AST pattern at - // the same line+cap can be dropped in favour of the richer flow. - let mut taint_finding_caps_by_line: HashMap = HashMap::new(); + // Cap bits per line for cap-specific flow-backed findings only, so a + // redundant AST pattern at the same line+cap can be dropped in favour + // of the richer flow. Do not count legacy generic findings here: + // `taint-unsanitised-flow` and `cfg-unguarded-sink` carry evidence, + // but their rule ids are deliberately catch-alls, while AST `cmdi`, + // `sqli`, etc. IDs are the canonical namespace many tests, SARIF + // consumers, and dynamic-verification spec derivation rely on. + let mut specific_taint_finding_caps_by_line: HashMap = HashMap::new(); for d in taint_diags { - if d.id.starts_with("taint-") || d.id == "cfg-unguarded-sink" { + if d.id.starts_with("taint-") && !d.id.starts_with("taint-unsanitised-flow") { if let Some(caps) = d.evidence.as_ref().map(|e| e.sink_caps) { if caps != 0 { - *taint_finding_caps_by_line.entry(d.line).or_default() |= caps; + *specific_taint_finding_caps_by_line + .entry(d.line) + .or_default() |= caps; } } } @@ -5727,7 +5734,7 @@ impl TaintSuppressionCtx { engine_validated_funcs, source_killed_funcs, interproc_sanitizer_callers, - taint_finding_caps_by_line, + specific_taint_finding_caps_by_line, } } @@ -5746,7 +5753,7 @@ impl TaintSuppressionCtx { let Some(cap) = pattern_category_cap(pattern_id) else { return false; }; - self.taint_finding_caps_by_line + self.specific_taint_finding_caps_by_line .get(&line) .is_some_and(|caps| caps & cap.bits() != 0) } diff --git a/src/auth_analysis/checks.rs b/src/auth_analysis/checks.rs index 3a122eed..2ff7189b 100644 --- a/src/auth_analysis/checks.rs +++ b/src/auth_analysis/checks.rs @@ -902,6 +902,24 @@ fn is_self_scoped_session_base(base: &str) -> bool { | "ctx.session.currentUser" | "ctx.state.user" | "ctx.state.currentUser" + // The caller's own id from the session is self-scoped: fetching + // your own record with it is not IDOR (only a foreign, + // request-supplied id is). The `.user` forms above missed the + // `req.session.userId` / `session.uid` idiom. + | "req.session.userId" + | "request.session.userId" + | "session.userId" + | "req.session.userid" + | "request.session.userid" + | "session.userid" + | "req.session.uid" + | "request.session.uid" + | "session.uid" + | "ctx.session.userId" + | "ctx.session.userid" + | "ctx.session.uid" + | "ctx.state.userId" + | "ctx.state.uid" ) } diff --git a/src/auth_analysis/extract/common.rs b/src/auth_analysis/extract/common.rs index 355f31b2..901678cd 100644 --- a/src/auth_analysis/extract/common.rs +++ b/src/auth_analysis/extract/common.rs @@ -3948,6 +3948,27 @@ fn collect_param_names( } } } + // TypeScript `required_parameter` / `optional_parameter`. Descend only + // into the binding `pattern`, never the `type` annotation: the default + // arm harvests id-like names from object-type fields (`user: { id }`) + // and lifts typed-bounded scalar ids (`UserId: number`) into + // `unit.params`, over-firing the user-input gate on non-route helpers. + // Mirrors the Rust `parameter` arm plus the Go/Python id-like filter. + "required_parameter" | "optional_parameter" => { + if let Some(pattern) = node.child_by_field_name("pattern") { + if pattern.kind() == "identifier" && node.child_by_field_name("type").is_some() { + let name = text(pattern, bytes); + if !name.is_empty() + && !out.contains(&name) + && (include_id_like_typed || !is_python_id_like_typed_param(&name)) + { + out.push(name); + } + } else { + collect_param_names(pattern, bytes, include_id_like_typed, out); + } + } + } _ => { for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { diff --git a/src/labels/javascript.rs b/src/labels/javascript.rs index fae5a878..c3c4bb8f 100644 --- a/src/labels/javascript.rs +++ b/src/labels/javascript.rs @@ -221,11 +221,8 @@ pub static RULES: &[LabelRule] = &[ label: DataLabel::Sink(Cap::HTML_ESCAPE), case_sensitive: false, }, - LabelRule { - matchers: &["res.redirect"], - label: DataLabel::Sink(Cap::SSRF), - case_sensitive: false, - }, + // `res.redirect` is OPEN_REDIRECT only (see the dedicated rule below): a + // 302 to the browser is client-side navigation, not SSRF. LabelRule { matchers: &["res.sendFile", "res.download"], label: DataLabel::Sink(Cap::FILE_IO), @@ -911,6 +908,37 @@ pub static GATED_SINKS: &[SinkGate] = &[ object_destination_fields: &["url", "prefixUrl"], }, }, + // `request` npm library: `request.get(url)` / `request.post(url, …)`. The + // Destination gate fires only on a tainted URL arg, so the `req.get(header)` + // header-read collision (constant arg 0) never activates. + SinkGate { + callee_matcher: "request.get", + arg_index: 0, + dangerous_values: &[], + dangerous_prefixes: &[], + label: DataLabel::Sink(Cap::SSRF), + case_sensitive: false, + payload_args: &[0], + keyword_name: None, + dangerous_kwargs: &[], + activation: GateActivation::Destination { + object_destination_fields: &["url", "uri"], + }, + }, + SinkGate { + callee_matcher: "request.post", + arg_index: 0, + dangerous_values: &[], + dangerous_prefixes: &[], + label: DataLabel::Sink(Cap::SSRF), + case_sensitive: false, + payload_args: &[0], + keyword_name: None, + dangerous_kwargs: &[], + activation: GateActivation::Destination { + object_destination_fields: &["url", "uri"], + }, + }, // `undici.request(url | opts[, opts])`, opts exposes `origin` and // `path`. Body-ish fields (`body`, `headers`) are excluded. SinkGate { diff --git a/src/labels/mod.rs b/src/labels/mod.rs index 2045e64b..f589a5a4 100644 --- a/src/labels/mod.rs +++ b/src/labels/mod.rs @@ -670,6 +670,31 @@ pub fn is_js_ts_handler_param_name(name: &str) -> bool { false } +/// Bare bindings denoting an Express/Koa request sub-object when the handler +/// param is destructured (`({ query }, res) => …`). Kept out of +/// [`is_js_ts_handler_param_name`] so a plain param named `query`/`body` is +/// never seeded; the SSA seeder additionally requires a sibling response param. +const JS_TS_REQUEST_FIELD_NAMES: &[&str] = + &["query", "body", "params", "headers", "cookies", "cookie"]; + +/// True when `name` is a bare destructured request-field binding. Only +/// meaningful behind the destructured-handler-param gate in the SSA seeder. +pub fn is_express_request_field_name(name: &str) -> bool { + JS_TS_REQUEST_FIELD_NAMES + .iter() + .any(|candidate| candidate.eq_ignore_ascii_case(name)) +} + +/// True for the conventional Express/Koa/Fastify response-object parameter +/// (`res`/`response`/`reply`) — the structural signal that a function is a +/// route handler, so a sibling destructured `{ query }` is a real source. +pub fn is_handler_response_param_name(name: &str) -> bool { + matches!( + name.to_ascii_lowercase().as_str(), + "res" | "response" | "reply" + ) +} + #[inline(always)] pub fn lookup(lang: &str, raw: &str) -> Kind { CLASSIFIERS diff --git a/src/labels/php.rs b/src/labels/php.rs index 23ca51ef..b0f977b9 100644 --- a/src/labels/php.rs +++ b/src/labels/php.rs @@ -528,13 +528,12 @@ pub static GATED_SINKS: &[SinkGate] = &[ // is a `Location: ...` header, so the dashboard / OWASP bucket // correctly classifies redirect-class flows independently of CRLF. // - // Activation: arg 0 prefix `Location:` (case-insensitive). When arg - // 0 is a constant string starting with `Location:` the gate fires and - // checks payload arg 0 for taint; constants like `Content-Type: ...` - // are suppressed by the safe-literal branch. When arg 0 is a binary - // expression (`"Location: " . $url`) or otherwise dynamic, the - // value-extraction returns `None` and the gate fires conservatively - // — matching the existing convention in `setAttribute`/`parseFromString`. + // Fires only on a positive `Location:` literal at arg 0 (a constant, or a + // concat whose leading literal is `Location:` — `extract_const_string_arg` + // returns the left-most literal). `LiteralOnly` makes the dynamic/unknown + // case suppress rather than fire conservatively, so `header($notALocation)` + // and 404-status-line forms no longer mis-classify as OPEN_REDIRECT. The + // flat HEADER_INJECTION sink above still fires on any tainted `header()`. SinkGate { callee_matcher: "=header", arg_index: 0, @@ -545,7 +544,7 @@ pub static GATED_SINKS: &[SinkGate] = &[ payload_args: &[0], keyword_name: None, dangerous_kwargs: &[], - activation: GateActivation::ValueMatch, + activation: GateActivation::LiteralOnly, }, // Smarty `$smarty->fetch($name)` — only the `string:` resource prefix // accepts an inline template *source*; the bare form (`page.tpl`) is a diff --git a/src/labels/typescript.rs b/src/labels/typescript.rs index 79d763f7..41dfaa12 100644 --- a/src/labels/typescript.rs +++ b/src/labels/typescript.rs @@ -186,11 +186,8 @@ pub static RULES: &[LabelRule] = &[ label: DataLabel::Sink(Cap::HTML_ESCAPE), case_sensitive: false, }, - LabelRule { - matchers: &["res.redirect"], - label: DataLabel::Sink(Cap::SSRF), - case_sensitive: false, - }, + // `res.redirect` is OPEN_REDIRECT only (dedicated rule below): a 302 to the + // browser is client-side navigation, not SSRF. LabelRule { matchers: &["res.sendFile", "res.download"], label: DataLabel::Sink(Cap::FILE_IO), @@ -693,6 +690,36 @@ pub static GATED_SINKS: &[SinkGate] = &[ object_destination_fields: &["url", "prefixUrl"], }, }, + // `request` npm library: `request.get(url)` / `request.post(url, …)`. + // Destination gate fires only on a tainted URL arg. Mirrors javascript.rs. + SinkGate { + callee_matcher: "request.get", + arg_index: 0, + dangerous_values: &[], + dangerous_prefixes: &[], + label: DataLabel::Sink(Cap::SSRF), + case_sensitive: false, + payload_args: &[0], + keyword_name: None, + dangerous_kwargs: &[], + activation: GateActivation::Destination { + object_destination_fields: &["url", "uri"], + }, + }, + SinkGate { + callee_matcher: "request.post", + arg_index: 0, + dangerous_values: &[], + dangerous_prefixes: &[], + label: DataLabel::Sink(Cap::SSRF), + case_sensitive: false, + payload_args: &[0], + keyword_name: None, + dangerous_kwargs: &[], + activation: GateActivation::Destination { + object_destination_fields: &["url", "uri"], + }, + }, SinkGate { callee_matcher: "undici.request", arg_index: 0, diff --git a/src/patterns/javascript.rs b/src/patterns/javascript.rs index 6f720009..f4d1029a 100644 --- a/src/patterns/javascript.rs +++ b/src/patterns/javascript.rs @@ -162,6 +162,24 @@ pub const PATTERNS: &[Pattern] = &[ category: PatternCategory::Secrets, confidence: Confidence::Medium, }, + // ── Tier A: Hardcoded cryptographic key/secret config ────────────── + // Crypto-key-shaped keys (`cookieSecret`, `cryptoKey`, `signingKey`, …) the + // anchored `hardcoded_secret` regex misses. Emits a `crypto`-bucketing id + // (a `*.secrets.*` id buckets as `other`). Benign `publicKey`/`primaryKey`/ + // `keyName`/bare `key` are rejected by the prefix requirement. + Pattern { + id: "js.crypto.hardcoded_key", + description: "Hardcoded cryptographic key/secret in source config", + query: r#"(pair + key: (property_identifier) @key + (#match? @key "(?i)^([a-z0-9]+secret|(crypto|cookie|session|signing|encryption|encrypt|private|master|jwt|hmac|secret)key|api[_-]?key|access[_-]?key|secret[_-]?key|private[_-]?key|encryption[_-]?key|signing[_-]?key)$") + value: (string) @val (#match? @val "[^\"']{3,}")) + @vuln"#, + severity: Severity::Low, + tier: PatternTier::A, + category: PatternCategory::Crypto, + confidence: Confidence::Medium, + }, // ── Tier A: Open redirect ────────────────────────────────────────── Pattern { id: "js.xss.location_assign", diff --git a/src/patterns/typescript.rs b/src/patterns/typescript.rs index b8e13184..5be60cbb 100644 --- a/src/patterns/typescript.rs +++ b/src/patterns/typescript.rs @@ -133,6 +133,22 @@ pub const PATTERNS: &[Pattern] = &[ category: PatternCategory::Secrets, confidence: Confidence::Medium, }, + // ── Tier A: Hardcoded cryptographic key/secret config ────────────── + // Crypto-key-shaped keys the anchored `hardcoded_secret` regex misses; + // emits a `crypto`-bucketing rule id. See javascript.rs for rationale. + Pattern { + id: "ts.crypto.hardcoded_key", + description: "Hardcoded cryptographic key/secret in source config", + query: r#"(pair + key: (property_identifier) @key + (#match? @key "(?i)^([a-z0-9]+secret|(crypto|cookie|session|signing|encryption|encrypt|private|master|jwt|hmac|secret)key|api[_-]?key|access[_-]?key|secret[_-]?key|private[_-]?key|encryption[_-]?key|signing[_-]?key)$") + value: (string) @val (#match? @val "[^\"']{3,}")) + @vuln"#, + severity: Severity::Low, + tier: PatternTier::A, + category: PatternCategory::Crypto, + confidence: Confidence::Medium, + }, // ── Tier A: TypeScript-specific type-safety escapes ──────────────── Pattern { id: "ts.quality.any_annotation", diff --git a/src/taint/ssa_transfer/mod.rs b/src/taint/ssa_transfer/mod.rs index 8daa5721..5c741c87 100644 --- a/src/taint/ssa_transfer/mod.rs +++ b/src/taint/ssa_transfer/mod.rs @@ -5981,7 +5981,28 @@ pub(super) fn transfer_inst( .split_once('.') .map(|(root, _)| crate::labels::is_js_ts_handler_param_name(root)) .unwrap_or(false); - if crate::labels::is_js_ts_handler_param_name(var_name) || root_is_handler { + // Destructured Express request param (`({ query }, res) => + // …`): `query` lowers as a bare `Param`, so the textual + // `req.query` source label never matches. Seed it only when + // a sibling response param is present (the route-handler + // signal), so a plain `paginate(query)` stays un-seeded. + let is_destructured_request_field = + crate::labels::is_express_request_field_name(var_name) && { + let eb = &ssa.blocks[ssa.entry.0 as usize]; + eb.phis.iter().chain(eb.body.iter()).any(|i| { + matches!(i.op, SsaOp::Param { .. }) + && ssa + .value_defs + .get(i.value.0 as usize) + .and_then(|vd| vd.var_name.as_deref()) + .map(crate::labels::is_handler_response_param_name) + .unwrap_or(false) + }) + }; + if crate::labels::is_js_ts_handler_param_name(var_name) + || root_is_handler + || is_destructured_request_field + { let origin = TaintOrigin { node: inst.cfg_node, source_kind: SourceKind::UserInput,