mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-27 20:29:39 +02:00
Improved path traversal detection and enhanced sink classification logic
This commit is contained in:
parent
58f1794a4e
commit
3c89bddbf2
56 changed files with 3989 additions and 345 deletions
|
|
@ -207,6 +207,34 @@ impl PathFact {
|
|||
!self.is_bottom && self.dotdot == Tri::No && self.absolute == Tri::No
|
||||
}
|
||||
|
||||
/// True iff the fact proves the path stays inside a trusted region
|
||||
/// for path-traversal purposes (the FILE_IO sink-suppression
|
||||
/// predicate).
|
||||
///
|
||||
/// Accepts either of two structural invariants:
|
||||
///
|
||||
/// * `dotdot = No && absolute = No` — the relative-and-`..`-free
|
||||
/// shape recognised by [`is_path_safe`]. Cannot escape to an
|
||||
/// attacker-controlled absolute location.
|
||||
/// * `dotdot = No && prefix_lock.is_some()` — a canonicalised path
|
||||
/// (typically `File.expand_path` / `realpath` / `fs::canonicalize`)
|
||||
/// that has been verified-rooted by a `starts_with`-style guard
|
||||
/// against some prefix. The prefix may be opaque
|
||||
/// ([`OPAQUE_PREFIX_LOCK`]); the structural guarantee is the same:
|
||||
/// the path is provably inside the locked subtree.
|
||||
///
|
||||
/// This relaxation closes the rswag CVE-2023-38337 patched-counterpart
|
||||
/// FP shape (`File.expand_path(File.join(root, p)) + start_with? root`)
|
||||
/// and the equivalent Python (`os.path.realpath + .startswith(root)`)
|
||||
/// and JS (`path.resolve + .startsWith(root)`) idioms, all of which
|
||||
/// produce absolute paths but are sound against `..` traversal.
|
||||
pub fn is_path_traversal_safe(&self) -> bool {
|
||||
if self.is_bottom || self.dotdot != Tri::No {
|
||||
return false;
|
||||
}
|
||||
self.absolute == Tri::No || self.prefix_lock.is_some()
|
||||
}
|
||||
|
||||
/// True iff the fact has a prefix lock equal to or contained under
|
||||
/// `root`. Used by sink-suppression to confirm that a path derived
|
||||
/// from a locked root is provably still under that root.
|
||||
|
|
@ -391,6 +419,16 @@ pub enum PathAssertion {
|
|||
None,
|
||||
}
|
||||
|
||||
/// Sentinel root attached to a [`PathFact::prefix_lock`] when the
|
||||
/// `starts_with`-style guard's argument is non-literal (a method call,
|
||||
/// field access, configured root from the application). The structural
|
||||
/// invariant — "verified rooted under SOME prefix" — is what the sink-
|
||||
/// suppression layer needs; the *exact* prefix bytes are not. Combined
|
||||
/// with a `dotdot=No` proof from canonicalisation or `..`-rejection, an
|
||||
/// opaque prefix-lock is sufficient to prove the path stays inside a
|
||||
/// trusted region.
|
||||
pub const OPAQUE_PREFIX_LOCK: &str = "__nyx_opaque_prefix__";
|
||||
|
||||
/// Recognise a Rust path-rejection branch idiom from the raw condition text.
|
||||
///
|
||||
/// Accepts both atomic conditions (`x.contains("..")`) and multi-clause
|
||||
|
|
@ -449,6 +487,22 @@ pub fn classify_path_rejection_axes(text: &str) -> smallvec::SmallVec<[PathRejec
|
|||
out
|
||||
}
|
||||
|
||||
/// True iff any top-level OR clause of `text` is the pre-negated
|
||||
/// `!filepath.IsLocal(<expr>)` Go idiom — i.e. a clause whose `!` is
|
||||
/// already consumed by [`classify_path_rejection_axes`] when reporting
|
||||
/// the safe arm. Callers use this to decide whether AST-level negation
|
||||
/// (`condition_negated`) was already accounted for by the classifier
|
||||
/// (returns `true`) or still needs to flip the safe-arm polarity for
|
||||
/// polarity-blind atoms like `!path.contains("..")` (returns `false`).
|
||||
pub(crate) fn cond_has_pre_negated_islocal_clause(text: &str) -> bool {
|
||||
for clause in split_top_level_or(text) {
|
||||
if has_negated_filepath_is_local(clause.trim()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Detect `!filepath.IsLocal(<expr>)`, Go's idiomatic path-traversal
|
||||
/// guard. Whitespace-tolerant: `! filepath.IsLocal(`, `!filepath . IsLocal(`,
|
||||
/// etc. Used by [`classify_path_rejection_axes`] to inject both
|
||||
|
|
@ -651,19 +705,39 @@ fn split_top_level_or(text: &str) -> smallvec::SmallVec<[&str; 4]> {
|
|||
out
|
||||
}
|
||||
|
||||
/// Recognise a Rust path-positive-assertion branch idiom.
|
||||
/// Recognise a path-positive-assertion branch idiom (language-agnostic).
|
||||
///
|
||||
/// Returns:
|
||||
///
|
||||
/// * `PrefixLock(<literal>)` when the condition is a `starts_with`-style
|
||||
/// call with a literal prefix of length ≥ 2. Sibling single-character
|
||||
/// prefixes (`"/"`, `"\\"`) are absolute-axis rejections, not locks.
|
||||
/// * `PrefixLock(`[`OPAQUE_PREFIX_LOCK`]`)` when the call has a
|
||||
/// non-empty, *non-literal* argument (method call, field access, local
|
||||
/// variable). The opaque marker certifies the structural invariant
|
||||
/// "verified rooted under some prefix" without committing to bytes,
|
||||
/// which is exactly what FILE_IO sink-suppression needs to combine with
|
||||
/// a `dotdot=No` proof — the upstream code path
|
||||
/// `File.expand_path(...) + start_with?(<config_root>)` is the
|
||||
/// motivating example.
|
||||
/// * `None` otherwise.
|
||||
pub fn classify_path_assertion(text: &str) -> PathAssertion {
|
||||
let trimmed = text.trim();
|
||||
if let Some(needle) = extract_starts_with_arg(trimmed) {
|
||||
// Positive assertion: a literal-prefix `starts_with` on a locked
|
||||
// root. Sibling slash ("/") and backslash ("\\") are also
|
||||
// classified as rejections above; prefix-lock only fires when the
|
||||
// prefix is multi-character (i.e. carries real locking info).
|
||||
if needle.len() >= 2 {
|
||||
return PathAssertion::PrefixLock(needle);
|
||||
match extract_starts_with_arg(trimmed) {
|
||||
Some(needle) if needle.len() >= 2 => PathAssertion::PrefixLock(needle),
|
||||
// Single-char literal (`"/"`, `"\\"`) is an absolute-axis
|
||||
// rejection idiom handled by `classify_path_rejection_axes`, not
|
||||
// a positive prefix-lock — fall through to None.
|
||||
Some(_) => PathAssertion::None,
|
||||
// No literal recovered: check for a non-literal argument
|
||||
// (method call, field access, configured root) and attach the
|
||||
// opaque marker so the structural "verified rooted under SOME
|
||||
// prefix" invariant is recorded for downstream sink suppression.
|
||||
None if has_starts_with_call_with_nonempty_arg(trimmed) => {
|
||||
PathAssertion::PrefixLock(OPAQUE_PREFIX_LOCK.to_string())
|
||||
}
|
||||
None => PathAssertion::None,
|
||||
}
|
||||
PathAssertion::None
|
||||
}
|
||||
|
||||
/// Recognise a *structural* one-argument enum-variant constructor.
|
||||
|
|
@ -1136,6 +1210,69 @@ fn extract_starts_with_arg(text: &str) -> Option<String> {
|
|||
None
|
||||
}
|
||||
|
||||
/// Detect a `starts_with`-style call with a non-empty argument, where the
|
||||
/// argument is *not* recovered as a string literal by
|
||||
/// [`extract_starts_with_arg`] (so it's a method call, field access, local
|
||||
/// variable, etc.). Used by [`classify_path_assertion`] to attach an
|
||||
/// opaque prefix-lock when the application validates with a configured
|
||||
/// root rather than an inline string literal.
|
||||
///
|
||||
/// Whitespace-tolerant. Conservative: returns `false` for any shape where
|
||||
/// the argument cannot be confirmed non-empty.
|
||||
fn has_starts_with_call_with_nonempty_arg(text: &str) -> bool {
|
||||
// Method-call forms with parens. The argument-presence check is
|
||||
// simple: after the opening `(`, the first non-whitespace byte must
|
||||
// not be `)` (empty arg list).
|
||||
for method in [
|
||||
".starts_with(",
|
||||
".start_with?(",
|
||||
".startsWith(",
|
||||
".startswith(",
|
||||
] {
|
||||
if let Some(idx) = text.find(method) {
|
||||
let after = &text[idx + method.len()..];
|
||||
if first_non_ws_byte(after).is_some_and(|b| b != b')') {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Ruby paren-less call: `r.start_with? <expr>`. Tree-sitter still
|
||||
// serialises the source text verbatim, so a space (or tab) follows
|
||||
// the `?`. Require a non-empty, non-clause-terminator token after.
|
||||
if let Some(idx) = text.find(".start_with?") {
|
||||
let rest = &text[idx + ".start_with?".len()..];
|
||||
// Skip the `(` form (already covered above) and any whitespace.
|
||||
let after = rest.trim_start();
|
||||
if !after.is_empty() {
|
||||
let first = after.as_bytes()[0];
|
||||
// `(` belongs to the parenthesised form; clause terminators
|
||||
// (`&&` / `||` / `)` / `]` / `;` / `,`) mean the call has no
|
||||
// arguments at this position.
|
||||
if !matches!(first, b'(' | b'&' | b'|' | b')' | b']' | b';' | b',') {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Go free-function form `strings.HasPrefix(<recv>, <prefix>)`. The
|
||||
// second argument must exist and be non-empty.
|
||||
if let Some(idx) = text.find("strings.HasPrefix(") {
|
||||
let inner = &text[idx + "strings.HasPrefix(".len()..];
|
||||
if let Some(comma_idx) = top_level_comma(inner) {
|
||||
let after_comma = inner[comma_idx + 1..].trim_start();
|
||||
if !after_comma.is_empty() && !after_comma.starts_with(')') {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Return the first non-whitespace byte of `text`, or `None` if the slice
|
||||
/// is empty or all-whitespace.
|
||||
fn first_non_ws_byte(text: &str) -> Option<u8> {
|
||||
text.bytes().find(|b| !b.is_ascii_whitespace())
|
||||
}
|
||||
|
||||
/// Find the index of the first top-level `,` in a slice (depth 0, ignoring
|
||||
/// commas inside nested parentheses, brackets, braces, or string literals).
|
||||
/// Returns `None` if no top-level comma is present.
|
||||
|
|
@ -1716,6 +1853,109 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn assertion_opaque_prefix_lock_method_call_arg() {
|
||||
// rswag CVE-2023-38337 patched shape: `start_with?` with a
|
||||
// configured-root method call as argument. The exact bytes are
|
||||
// unknown to the analyser, but the structural invariant "rooted
|
||||
// under SOME prefix" is captured via the opaque marker.
|
||||
assert_eq!(
|
||||
classify_path_assertion("filename.start_with? @config.resolve_swagger_root(env)"),
|
||||
PathAssertion::PrefixLock(OPAQUE_PREFIX_LOCK.to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn assertion_opaque_prefix_lock_paren_method_call() {
|
||||
// Same shape, parenthesised: `r.start_with?(some_root)`.
|
||||
assert_eq!(
|
||||
classify_path_assertion("filename.start_with?(@config.root)"),
|
||||
PathAssertion::PrefixLock(OPAQUE_PREFIX_LOCK.to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn assertion_opaque_prefix_lock_python_startswith() {
|
||||
// Python: `os.path.realpath(p).startswith(safe_root)` where
|
||||
// `safe_root` is a local variable, not a literal.
|
||||
assert_eq!(
|
||||
classify_path_assertion("p.startswith(safe_root)"),
|
||||
PathAssertion::PrefixLock(OPAQUE_PREFIX_LOCK.to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn assertion_opaque_prefix_lock_js_starts_with() {
|
||||
assert_eq!(
|
||||
classify_path_assertion("resolved.startsWith(uploadsDir)"),
|
||||
PathAssertion::PrefixLock(OPAQUE_PREFIX_LOCK.to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn assertion_opaque_prefix_lock_go_hasprefix() {
|
||||
assert_eq!(
|
||||
classify_path_assertion("strings.HasPrefix(p, safeRoot)"),
|
||||
PathAssertion::PrefixLock(OPAQUE_PREFIX_LOCK.to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn assertion_no_lock_on_empty_arg() {
|
||||
// `r.starts_with()` (degenerate) should not produce a lock.
|
||||
assert_eq!(
|
||||
classify_path_assertion("r.starts_with()"),
|
||||
PathAssertion::None
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_path_traversal_safe_relative_dotdot_free() {
|
||||
let f = PathFact::default()
|
||||
.with_dotdot_cleared()
|
||||
.with_absolute_cleared();
|
||||
assert!(f.is_path_traversal_safe());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_path_traversal_safe_canonicalised_with_prefix_lock() {
|
||||
// `File.expand_path + start_with?(root)` shape: dotdot=No,
|
||||
// absolute=Yes, prefix_lock=Some. The relaxed predicate should
|
||||
// accept this even though the strict `is_path_safe` rejects it.
|
||||
let f = PathFact::default()
|
||||
.with_dotdot_cleared()
|
||||
.with_prefix_lock("__nyx_opaque_prefix__");
|
||||
assert!(!f.is_path_safe(), "absolute axis still Maybe blocks strict");
|
||||
// Setting absolute=Yes via expand_path-style transfer:
|
||||
let mut f2 = f.clone();
|
||||
f2.absolute = Tri::Yes;
|
||||
assert!(!f2.is_path_safe(), "absolute=Yes blocks strict predicate");
|
||||
assert!(
|
||||
f2.is_path_traversal_safe(),
|
||||
"prefix_lock + dotdot=No is sufficient under relaxed predicate"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_path_traversal_safe_rejects_dotdot_maybe() {
|
||||
let f = PathFact::default().with_prefix_lock("/var/app/");
|
||||
// dotdot still Maybe — relaxed predicate must still reject.
|
||||
assert!(!f.is_path_traversal_safe());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_path_traversal_safe_rejects_absolute_without_lock() {
|
||||
let mut f = PathFact::default().with_dotdot_cleared();
|
||||
f.absolute = Tri::Yes;
|
||||
// No prefix_lock — relaxed predicate must reject.
|
||||
assert!(!f.is_path_traversal_safe());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_path_traversal_safe_rejects_bottom() {
|
||||
assert!(!PathFact::bottom().is_path_traversal_safe());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn primitive_canonicalize_normalises() {
|
||||
let f = classify_path_primitive("fs::canonicalize", &PathFact::top()).unwrap();
|
||||
|
|
|
|||
1267
src/ast.rs
1267
src/ast.rs
File diff suppressed because it is too large
Load diff
|
|
@ -85,6 +85,77 @@ fn inner_call_override_narrows_classification_span() {
|
|||
);
|
||||
}
|
||||
|
||||
/// Ruby (and any language without an `expression_statement` wrapper)
|
||||
/// reaches `push_node` with `ast.kind() == "call"` (`Kind::CallMethod`)
|
||||
/// for top-level statement-position calls. The inner-call fallback at
|
||||
/// `push_node` line ~1690 must include `Kind::CallFn | Kind::CallMethod
|
||||
/// | Kind::CallMacro` in its kind gate, otherwise an unclassified outer
|
||||
/// wrapper around a sink (e.g. `YAML.safe_load(File.read(filename))`,
|
||||
/// `String.new(File.read(x))`, `JSON.parse(File.read(x))` — every
|
||||
/// chain-style sink wrapper used in real Ruby helpers) loses the inner
|
||||
/// sink's classification entirely. Cross-function summary extraction
|
||||
/// then misses the wrapper's `param_to_sink` and downstream callers
|
||||
/// silently lose detection. Regression guard for CVE-2023-38337
|
||||
/// (rswag-api `parse_file → load_yaml/load_json → File.read` chain)
|
||||
/// and CVE-2021-21288 (CarrierWave `download → OpenURI.open_uri`).
|
||||
#[test]
|
||||
fn ruby_inner_call_fallback_classifies_wrapper_around_file_read() {
|
||||
let src = b"def f(x)\n YAML.safe_load(File.read(x))\nend\n";
|
||||
let ts_lang = Language::from(tree_sitter_ruby::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "ruby", ts_lang);
|
||||
|
||||
// The outer call `YAML.safe_load(...)` does not classify by itself;
|
||||
// the fallback must descend into its argument list and pick up the
|
||||
// inner `File.read(x)` Sink(FILE_IO) label.
|
||||
let sink = cfg
|
||||
.node_indices()
|
||||
.find(|&i| cfg[i].call.callee.as_deref() == Some("File.read"))
|
||||
.expect(
|
||||
"inner-call fallback should override the outer YAML.safe_load callee with File.read",
|
||||
);
|
||||
|
||||
let info = &cfg[sink];
|
||||
assert!(
|
||||
info.taint
|
||||
.labels
|
||||
.iter()
|
||||
.any(|l| matches!(l, DataLabel::Sink(c) if c.contains(crate::labels::Cap::FILE_IO))),
|
||||
"wrapper-around-File.read node must carry the FILE_IO sink label"
|
||||
);
|
||||
// outer_callee should preserve the original callee text so cross-fn
|
||||
// summary lookup can still find the wrapping function.
|
||||
assert_eq!(
|
||||
info.call.outer_callee.as_deref(),
|
||||
Some("YAML.safe_load"),
|
||||
"outer_callee must preserve the original wrapping callee"
|
||||
);
|
||||
}
|
||||
|
||||
/// Identical-shape regression guard for the *bare-function* call
|
||||
/// variant (`outer(File.read(x))`) — exercises the `Kind::CallFn`
|
||||
/// branch of the gate, where Ruby/Python/etc.'s top-level free
|
||||
/// function calls lacking a method receiver land.
|
||||
#[test]
|
||||
fn ruby_inner_call_fallback_classifies_bare_outer_around_file_read() {
|
||||
let src = b"def f(x)\n outer(File.read(x))\nend\n";
|
||||
let ts_lang = Language::from(tree_sitter_ruby::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "ruby", ts_lang);
|
||||
|
||||
let sink = cfg
|
||||
.node_indices()
|
||||
.find(|&i| cfg[i].call.callee.as_deref() == Some("File.read"))
|
||||
.expect("inner-call fallback must override `outer` callee with File.read");
|
||||
|
||||
let info = &cfg[sink];
|
||||
assert!(
|
||||
info.taint
|
||||
.labels
|
||||
.iter()
|
||||
.any(|l| matches!(l, DataLabel::Sink(c) if c.contains(crate::labels::Cap::FILE_IO))),
|
||||
"wrapper-around-File.read node must carry FILE_IO sink label"
|
||||
);
|
||||
}
|
||||
|
||||
/// `classification_span()` must fall back to `ast.span` when no narrower
|
||||
/// sub-expression was recorded, so existing structural code paths keep
|
||||
/// working unchanged for nodes whose classification applies to the whole
|
||||
|
|
|
|||
|
|
@ -1681,12 +1681,31 @@ pub(super) fn push_node<'a>(
|
|||
// When the callee is overridden, save the original for container ops
|
||||
// (e.g. `parts.add(req.getParameter(...))`, callee becomes
|
||||
// "req.getParameter" but outer_callee preserves "parts.add").
|
||||
//
|
||||
// Statement-level calls in languages without a separate
|
||||
// `expression_statement` wrapper (Ruby, where `body_statement` directly
|
||||
// contains the call AST node) reach `push_node` with `ast.kind() ==
|
||||
// "call"` (`Kind::CallMethod`) rather than `Kind::CallWrapper`. Without
|
||||
// including the call kinds in the gate, an unclassified outer wrapper
|
||||
// around a sink (e.g. `YAML.safe_load(File.read(filename))` or
|
||||
// `String.new(File.read(x))`) loses the inner sink's classification
|
||||
// entirely — the outer call becomes a non-sink node, and the inner call
|
||||
// is not emitted as a standalone CFG node because it sits inside the
|
||||
// outer's `argument_list`. Cross-function summary extraction then
|
||||
// misses the `param_to_sink` for the wrapper helper, breaking detection
|
||||
// of every chain-style sink wrapper used in real Ruby CVEs (rswag
|
||||
// CVE-2023-38337, the Marshal/JSON/YAML-of-File.read pattern, etc.).
|
||||
let mut outer_callee: Option<String> = None;
|
||||
let mut inner_callee_span: Option<(usize, usize)> = None;
|
||||
if labels.is_empty()
|
||||
&& matches!(
|
||||
lookup(lang, ast.kind()),
|
||||
Kind::CallWrapper | Kind::Assignment | Kind::Return
|
||||
Kind::CallWrapper
|
||||
| Kind::Assignment
|
||||
| Kind::Return
|
||||
| Kind::CallFn
|
||||
| Kind::CallMethod
|
||||
| Kind::CallMacro
|
||||
)
|
||||
&& let Some((inner_text, inner_label, inner_span)) =
|
||||
find_classifiable_inner_call(ast, lang, code, extra)
|
||||
|
|
|
|||
|
|
@ -576,6 +576,7 @@ pub fn infer_source_kind(caps: Cap, callee: &str) -> SourceKind {
|
|||
|| cl.contains("form")
|
||||
|| cl.contains("query")
|
||||
|| cl.contains("params")
|
||||
|| cl.contains("param")
|
||||
|| cl.contains("input")
|
||||
|| cl.contains("body")
|
||||
|| cl.contains("location")
|
||||
|
|
@ -1691,6 +1692,16 @@ mod tests {
|
|||
assert_eq!(result, Some(DataLabel::Sink(Cap::SSRF)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_ruby_openuri_open_uri_is_ssrf_sink() {
|
||||
// OpenURI.open_uri is the canonical low-level URI fetcher that
|
||||
// URI.open delegates to. CarrierWave / Paperclip / similar gems
|
||||
// route SSRF-vulnerable downloads through it directly.
|
||||
// CVE-2021-21288 (CarrierWave) regression guard.
|
||||
let result = classify("ruby", "OpenURI.open_uri", None);
|
||||
assert_eq!(result, Some(DataLabel::Sink(Cap::SSRF)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unpack_matcher_strips_exact_sigil() {
|
||||
let (m, exact) = unpack_matcher(b"=open");
|
||||
|
|
|
|||
|
|
@ -127,11 +127,15 @@ pub static RULES: &[LabelRule] = &[
|
|||
},
|
||||
// URI.open is the network-capable Kernel#open wrapper, more specific than
|
||||
// plain `open` (excluded to avoid file I/O false positives).
|
||||
// OpenURI.open_uri is the canonical low-level URI fetcher that URI.open
|
||||
// delegates to — every SSRF-vulnerable Ruby download helper (CarrierWave
|
||||
// pre-2.1.1 / 1.3.2, Paperclip, etc.) ultimately reaches it.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"Net::HTTP.get",
|
||||
"Net::HTTP.post",
|
||||
"URI.open",
|
||||
"OpenURI.open_uri",
|
||||
"HTTParty.get",
|
||||
"HTTParty.post",
|
||||
],
|
||||
|
|
|
|||
|
|
@ -255,6 +255,7 @@ pub const PATTERNS: &[Pattern] = &[
|
|||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Hardcoded fallback secret ──────────────────────────────
|
||||
// Empty-string fallback (`|| ""`) is excluded — see typescript.rs for rationale.
|
||||
Pattern {
|
||||
id: "js.secrets.fallback_secret",
|
||||
description: "Environment variable with secret-like name has hardcoded fallback value",
|
||||
|
|
@ -266,7 +267,7 @@ pub const PATTERNS: &[Pattern] = &[
|
|||
property: (property_identifier) @key
|
||||
(#match? @key "(?i)(secret|password|key|token)"))
|
||||
operator: "||"
|
||||
right: (string) @fallback)
|
||||
right: (string) @fallback (#match? @fallback "[^\"']"))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
|
|
|
|||
|
|
@ -244,6 +244,10 @@ pub const PATTERNS: &[Pattern] = &[
|
|||
confidence: Confidence::High,
|
||||
},
|
||||
// ── Tier A: Hardcoded fallback secret ──────────────────────────────
|
||||
// The `(#match? @fallback "[^\"']")` predicate excludes empty-string
|
||||
// fallbacks (`process.env.X || ""`), which are the dominant FP shape
|
||||
// in production TypeScript: developers write `|| ""` to satisfy the
|
||||
// non-undefined string type without committing a real secret.
|
||||
Pattern {
|
||||
id: "ts.secrets.fallback_secret",
|
||||
description: "Environment variable with secret-like name has hardcoded fallback value",
|
||||
|
|
@ -255,7 +259,7 @@ pub const PATTERNS: &[Pattern] = &[
|
|||
property: (property_identifier) @key
|
||||
(#match? @key "(?i)(secret|password|key|token)"))
|
||||
operator: "||"
|
||||
right: (string) @fallback)
|
||||
right: (string) @fallback (#match? @fallback "[^\"']"))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::A,
|
||||
|
|
|
|||
|
|
@ -1037,6 +1037,75 @@ pub fn detect_replace_sanitizer(
|
|||
}
|
||||
}
|
||||
|
||||
/// Detect a call-site Replace sanitizer from syntactic argument literals.
|
||||
///
|
||||
/// Used by SSA transfer to recognize replace-based shell/HTML/SQL escapers
|
||||
/// without requiring a label rule per pattern. Returns the sanitized caps
|
||||
/// when:
|
||||
/// * the callee is a recognized Replace string method (per language),
|
||||
/// * the pattern argument is a concrete string literal, and
|
||||
/// * the pattern matches a security-relevant escape pattern in
|
||||
/// [`detect_replace_sanitizer`].
|
||||
///
|
||||
/// Non-global replaces (e.g. JS `s.replace(";", "")` only replaces the first
|
||||
/// occurrence) are excluded because partial replacement does not provide a
|
||||
/// sanitiser-strength guarantee at the call site.
|
||||
pub fn detect_call_site_replace_sanitizer(
|
||||
callee: &str,
|
||||
lang: Lang,
|
||||
arg_string_literals: &[Option<String>],
|
||||
) -> Option<Cap> {
|
||||
let pattern_pos = pattern_arg_position(callee, lang)?;
|
||||
let pattern = arg_string_literals
|
||||
.get(pattern_pos)
|
||||
.and_then(|o| o.as_deref())?;
|
||||
let replacement = arg_string_literals
|
||||
.get(pattern_pos + 1)
|
||||
.and_then(|o| o.as_deref())
|
||||
.unwrap_or("");
|
||||
let info = detect_replace_sanitizer(pattern, replacement, callee, lang)?;
|
||||
if !info.is_global || info.sanitized_caps.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some(info.sanitized_caps)
|
||||
}
|
||||
|
||||
fn pattern_arg_position(callee: &str, lang: Lang) -> Option<usize> {
|
||||
let method = bare_method_name(callee);
|
||||
match lang {
|
||||
Lang::JavaScript | Lang::TypeScript => match method {
|
||||
"replace" | "replaceAll" => Some(0),
|
||||
_ => None,
|
||||
},
|
||||
Lang::Python => match method {
|
||||
"replace" => Some(0),
|
||||
"sub" if callee == "re.sub" => Some(0),
|
||||
_ => None,
|
||||
},
|
||||
Lang::Ruby => match method {
|
||||
"gsub" | "sub" => Some(0),
|
||||
_ => None,
|
||||
},
|
||||
Lang::Java => match method {
|
||||
"replace" | "replaceAll" => Some(0),
|
||||
_ => None,
|
||||
},
|
||||
Lang::Go => match callee {
|
||||
"strings.Replace" | "strings.ReplaceAll" => Some(1),
|
||||
_ => None,
|
||||
},
|
||||
Lang::Php => match callee {
|
||||
"str_replace" => Some(0),
|
||||
_ => None,
|
||||
},
|
||||
Lang::Rust => match method {
|
||||
"replace" | "replacen" => Some(0),
|
||||
_ => None,
|
||||
},
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine whether a replace call is global (replaces all occurrences).
|
||||
fn is_global_replace(callee: &str, lang: Lang) -> bool {
|
||||
let method = bare_method_name(callee);
|
||||
|
|
|
|||
|
|
@ -566,6 +566,57 @@ fn count_call_args(text: &str) -> Option<usize> {
|
|||
Some(count)
|
||||
}
|
||||
|
||||
/// Extract the first top-level argument from `args_part`, the substring
|
||||
/// immediately following the open paren of a call expression. Walks
|
||||
/// paren/bracket/brace depth and skips quoted strings so nested calls and
|
||||
/// punctuation inside string literals do not confuse the scan. Returns
|
||||
/// the trimmed argument substring up to the first top-level `,` or
|
||||
/// matching `)`, or `None` when no balanced close paren is found.
|
||||
///
|
||||
/// Robust against trailing wrapper parens such as
|
||||
/// `(!ALLOWED.includes(cmd))` where naïve `strip_suffix(')')` would leave
|
||||
/// `cmd)` and lose the argument.
|
||||
fn first_call_arg(args_part: &str) -> Option<&str> {
|
||||
let bytes = args_part.as_bytes();
|
||||
let mut depth: usize = 1;
|
||||
let mut end: Option<usize> = None;
|
||||
let mut first_comma: Option<usize> = None;
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
let b = bytes[i];
|
||||
match b {
|
||||
b'(' | b'[' | b'{' => depth += 1,
|
||||
b')' | b']' | b'}' => {
|
||||
depth -= 1;
|
||||
if depth == 0 {
|
||||
end = Some(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
b',' if depth == 1 && first_comma.is_none() => first_comma = Some(i),
|
||||
b'"' | b'\'' => {
|
||||
let quote = b;
|
||||
i += 1;
|
||||
while i < bytes.len() {
|
||||
if bytes[i] == b'\\' && i + 1 < bytes.len() {
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
if bytes[i] == quote {
|
||||
break;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
let end = end?;
|
||||
let cut = first_comma.unwrap_or(end);
|
||||
Some(args_part[..cut].trim())
|
||||
}
|
||||
|
||||
/// Extract the validated variable from a condition text.
|
||||
///
|
||||
/// Handles two patterns:
|
||||
|
|
@ -592,11 +643,10 @@ fn extract_validation_target(text: &str) -> Option<String> {
|
|||
}
|
||||
}
|
||||
|
||||
// Function call pattern: `func(x, ...)`, extract first argument
|
||||
// Strip closing paren if present
|
||||
let args_inner = args_part.trim_end().strip_suffix(')').unwrap_or(args_part);
|
||||
// Take text up to first comma (first argument)
|
||||
let first_arg = args_inner.split(',').next()?.trim();
|
||||
// Function call pattern: `func(x, ...)`, extract first argument with
|
||||
// balanced-paren scan so trailing wrapper parens (`(validate(x))`) do
|
||||
// not corrupt the argument substring.
|
||||
let first_arg = first_call_arg(args_part)?;
|
||||
|
||||
// Strip reference operators (e.g. `&x` → `x`)
|
||||
let first_arg = first_arg.strip_prefix('&').unwrap_or(first_arg).trim();
|
||||
|
|
@ -630,11 +680,11 @@ fn extract_allowlist_target(text: &str) -> Option<String> {
|
|||
if let Some(pos) = lower.find(method) {
|
||||
let args_start = pos + method.len();
|
||||
let args_part = &trimmed[args_start..];
|
||||
let inner = args_part.strip_suffix(')').unwrap_or(args_part);
|
||||
let first_arg = inner.split(',').next()?.trim();
|
||||
let first_arg = first_arg.strip_prefix('$').unwrap_or(first_arg);
|
||||
if !first_arg.is_empty() && is_identifier(first_arg) {
|
||||
return Some(first_arg.to_string());
|
||||
if let Some(first_arg) = first_call_arg(args_part) {
|
||||
let first_arg = first_arg.strip_prefix('$').unwrap_or(first_arg);
|
||||
if !first_arg.is_empty() && is_identifier(first_arg) {
|
||||
return Some(first_arg.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -643,11 +693,11 @@ fn extract_allowlist_target(text: &str) -> Option<String> {
|
|||
if let Some(pos) = lower.find("in_array(") {
|
||||
let args_start = pos + "in_array(".len();
|
||||
let args_part = &trimmed[args_start..];
|
||||
let inner = args_part.strip_suffix(')').unwrap_or(args_part);
|
||||
let first_arg = inner.split(',').next()?.trim();
|
||||
let first_arg = first_arg.strip_prefix('$').unwrap_or(first_arg);
|
||||
if !first_arg.is_empty() && is_identifier(first_arg) {
|
||||
return Some(first_arg.to_string());
|
||||
if let Some(first_arg) = first_call_arg(args_part) {
|
||||
let first_arg = first_arg.strip_prefix('$').unwrap_or(first_arg);
|
||||
if !first_arg.is_empty() && is_identifier(first_arg) {
|
||||
return Some(first_arg.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1063,6 +1113,32 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_allowlist_target_negated_paren_wrapper() {
|
||||
// Tree-sitter records the if-condition as `(!ALLOWED.includes(cmd))`,
|
||||
// including the surrounding parens. Naïve `strip_suffix(')')` left
|
||||
// `cmd)` and `is_identifier` rejected the trailing `)`, dropping the
|
||||
// structural guard for `cfg-unguarded-sink` suppression. The
|
||||
// balanced-paren scan must return `Some("cmd")`.
|
||||
let (kind, target) = classify_condition_with_target("(!ALLOWED.includes(cmd))");
|
||||
assert_eq!(kind, PredicateKind::AllowlistCheck);
|
||||
assert_eq!(target.as_deref(), Some("cmd"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_allowlist_target_java_contains_paren_wrapper() {
|
||||
let (kind, target) = classify_condition_with_target("(!ALLOWED.contains(cmd))");
|
||||
assert_eq!(kind, PredicateKind::AllowlistCheck);
|
||||
assert_eq!(target.as_deref(), Some("cmd"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_allowlist_target_in_array_paren_wrapper() {
|
||||
let (kind, target) = classify_condition_with_target("(!in_array($cmd, $allowed))");
|
||||
assert_eq!(kind, PredicateKind::AllowlistCheck);
|
||||
assert_eq!(target.as_deref(), Some("cmd"));
|
||||
}
|
||||
|
||||
// ── TypeCheck classification ──────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -296,16 +296,16 @@ pub fn ssa_events_to_findings(
|
|||
crate::taint::ssa_transfer::state::record_all_validated_span(span);
|
||||
|
||||
// Mirror the path-safety pathway: when the SSA engine has
|
||||
// already proved every tainted input to a privileged
|
||||
// FILE_IO sink passed through validation, publish the sink
|
||||
// span so the state-analysis pass suppresses
|
||||
// `state-unauthed-access` on the same span. Trust here
|
||||
// matches the trust the engine already extends when
|
||||
// dropping the taint flow finding. Scoped to FILE_IO sinks
|
||||
// because that is the only sink class state-unauthed-access
|
||||
// currently fires on; broadening would risk stretching
|
||||
// validator-name heuristics into unrelated finding classes.
|
||||
if event.sink_caps.intersects(Cap::FILE_IO) {
|
||||
// already proved every tainted input to a privileged sink
|
||||
// passed through validation, publish the sink span so the
|
||||
// state-analysis pass suppresses `state-unauthed-access`
|
||||
// on the same span. Trust here matches the trust the
|
||||
// engine already extends when dropping the taint flow
|
||||
// finding. Covers the privileged sink classes
|
||||
// [`is_privileged_sink`] keys on (FILE_IO + SHELL_ESCAPE);
|
||||
// broadening past those would stretch the validator-trust
|
||||
// heuristic into unrelated finding classes.
|
||||
if event.sink_caps.intersects(Cap::FILE_IO | Cap::SHELL_ESCAPE) {
|
||||
crate::taint::ssa_transfer::state::record_path_safe_suppressed_span(span);
|
||||
}
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -987,6 +987,7 @@ fn compute_succ_states(
|
|||
&effective_vars,
|
||||
ssa,
|
||||
Some(transfer.interner),
|
||||
effective_negated,
|
||||
);
|
||||
|
||||
// Validation-call err-check narrowing. When the condition
|
||||
|
|
@ -1522,7 +1523,13 @@ fn resolve_var_to_ssa_value(var_name: &str, ssa: &SsaBody, block: BlockId) -> Op
|
|||
/// variables) and updates its [`PathFact`] according to the classified
|
||||
/// rejection / assertion idiom.
|
||||
///
|
||||
/// Gated on `transfer.lang == Lang::Rust` by the caller.
|
||||
/// `negated` reflects the effective negation of `cond_text`: when true,
|
||||
/// the condition's surface form is `!<cond_text>` (or `not <cond_text>`)
|
||||
/// and the True/False successor states correspond to the *rejection* /
|
||||
/// *surviving* arms inverted relative to the unwrapped condition. The
|
||||
/// narrowing functions are written against the unwrapped condition; this
|
||||
/// flag lets the caller route prefix-lock / rejection-axis narrowing to
|
||||
/// the arm where the unwrapped condition holds.
|
||||
#[cfg(test)]
|
||||
fn apply_path_fact_branch_narrowing(
|
||||
true_state: &mut SsaTaintState,
|
||||
|
|
@ -1538,6 +1545,7 @@ fn apply_path_fact_branch_narrowing(
|
|||
effective_vars,
|
||||
ssa,
|
||||
None,
|
||||
false,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -1548,10 +1556,12 @@ fn apply_path_fact_branch_narrowing_with_interner(
|
|||
effective_vars: &[String],
|
||||
ssa: &SsaBody,
|
||||
interner: Option<&SymbolInterner>,
|
||||
negated: bool,
|
||||
) {
|
||||
use crate::abstract_interp::PathFact;
|
||||
use crate::abstract_interp::path_domain::{
|
||||
PathAssertion, PathRejection, classify_path_assertion, classify_path_rejection_axes,
|
||||
cond_has_pre_negated_islocal_clause,
|
||||
};
|
||||
|
||||
let rejection_axes = classify_path_rejection_axes(cond_text);
|
||||
|
|
@ -1561,24 +1571,44 @@ fn apply_path_fact_branch_narrowing_with_interner(
|
|||
return;
|
||||
}
|
||||
|
||||
// Mark validated_may on the false branch when a path-rejection
|
||||
// Resolve the "safe arm" for the rejection axes.
|
||||
//
|
||||
// `classify_path_rejection_axes` reports axes that hold on the FALSE
|
||||
// branch of `cond_text` AS WRITTEN, with one exception: the
|
||||
// `!filepath.IsLocal(...)` Go idiom is matched at the clause level
|
||||
// and the classifier consumes the leading `!` itself (the safe arm
|
||||
// remains the FALSE branch of the whole condition).
|
||||
//
|
||||
// For polarity-blind atoms like `!path.contains("..")`, the
|
||||
// classifier ignores the leading `!` and still extracts `..`. In
|
||||
// that shape, AST detects the unary `!` and sets
|
||||
// `condition_negated = true`, but the rejection axis's *true* safe
|
||||
// arm is the TRUE branch of the whole condition. So when
|
||||
// `negated == true` AND no clause is the pre-negated IsLocal idiom,
|
||||
// flip the narrow target.
|
||||
let rejection_pre_negated = cond_has_pre_negated_islocal_clause(cond_text);
|
||||
let rejection_safe_is_true = negated && !rejection_pre_negated;
|
||||
|
||||
// Mark validated_may on the safe arm when a path-rejection
|
||||
// pattern fires. Mirrors the AllowlistCheck quirk that already
|
||||
// marks validated on the rejection-arm via `apply_branch_predicates`
|
||||
// for languages whose `.contains(...)` / membership idiom hits the
|
||||
// AllowlistCheck classifier, but normalises behaviour for shapes
|
||||
// like C `strstr(path, "..") != NULL` that hit the NullCheck arm
|
||||
// first and never get a chance to mark validation through the
|
||||
// allowlist path. Once the path-rejection classifier has accepted
|
||||
// the condition, the false branch (where the sink is reached after
|
||||
// the rejection-arm terminates) is the validated arm by
|
||||
// construction.
|
||||
// allowlist path.
|
||||
if !rejection_axes.is_empty()
|
||||
&& let Some(intern) = interner
|
||||
{
|
||||
let safe_state: &mut SsaTaintState = if rejection_safe_is_true {
|
||||
&mut *true_state
|
||||
} else {
|
||||
&mut *false_state
|
||||
};
|
||||
for var in effective_vars {
|
||||
if let Some(sym) = intern.get(var) {
|
||||
false_state.validated_may.insert(sym);
|
||||
false_state.validated_must.insert(sym);
|
||||
safe_state.validated_may.insert(sym);
|
||||
safe_state.validated_must.insert(sym);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1632,15 +1662,47 @@ fn apply_path_fact_branch_narrowing_with_interner(
|
|||
}
|
||||
};
|
||||
|
||||
// Apply rejection axes to the safe arm. The rejection classifier
|
||||
// (`has_negated_filepath_is_local` + `classify_path_rejection_atom`)
|
||||
// reports axes that hold on the FALSE branch of `cond_text` AS
|
||||
// WRITTEN, with one exception: the `!filepath.IsLocal(...)` Go idiom
|
||||
// is matched at the clause level and the classifier consumes the
|
||||
// leading `!` itself (safe arm remains the FALSE branch).
|
||||
//
|
||||
// For polarity-blind atoms like `!path.contains("..")` the classifier
|
||||
// ignores the leading `!` but AST-level negation flips the safe arm
|
||||
// to TRUE. Use the same `rejection_safe_is_true` resolution as the
|
||||
// validated-marker block above so soundness is consistent.
|
||||
let rejection_state: &mut SsaTaintState = if rejection_safe_is_true {
|
||||
&mut *true_state
|
||||
} else {
|
||||
&mut *false_state
|
||||
};
|
||||
for v in &targets {
|
||||
if let Some(ref mut abs) = false_state.abstract_state {
|
||||
if let Some(ref mut abs) = rejection_state.abstract_state {
|
||||
let mut av = abs.get(*v);
|
||||
narrow_false(&mut av.path);
|
||||
if !av.is_top() {
|
||||
abs.set(*v, av);
|
||||
}
|
||||
}
|
||||
if let Some(ref mut abs) = true_state.abstract_state {
|
||||
}
|
||||
|
||||
// Apply prefix-lock assertion to the cond-holds branch. Unlike the
|
||||
// rejection classifier, `classify_path_assertion` is naive about
|
||||
// leading negation — it just searches cond_text for a
|
||||
// `starts_with`-like substring. When `condition_negated` is true
|
||||
// (e.g. `if !target.startsWith(ROOT) { return; }`) the assertion
|
||||
// actually holds on the *false* CFG edge, where the sink is reached.
|
||||
// Flip the destination state in that case so the lock attaches to
|
||||
// the surviving block.
|
||||
let assertion_state = if negated {
|
||||
&mut *false_state
|
||||
} else {
|
||||
&mut *true_state
|
||||
};
|
||||
for v in &targets {
|
||||
if let Some(ref mut abs) = assertion_state.abstract_state {
|
||||
let mut av = abs.get(*v);
|
||||
narrow_true(&mut av.path);
|
||||
if !av.is_top() {
|
||||
|
|
@ -3024,6 +3086,80 @@ pub(super) fn transfer_inst(
|
|||
return;
|
||||
}
|
||||
|
||||
// Chain-wrapper sanitiser detection. Computed up-front so
|
||||
// both the container-element-write hook and the outer-
|
||||
// callee taint suppression block below can consult it.
|
||||
// Walks `info.arg_callees` for the chain shape
|
||||
// `outer(... wrapper(<source>) ...)`, collecting any
|
||||
// sanitiser caps the wrapper's summary or label exposes.
|
||||
// The set is empty when there is no chain wrapper or when
|
||||
// none of the wrappers expose sanitisation.
|
||||
//
|
||||
// Argument attribution: when `find_classifiable_inner_call`
|
||||
// overrode the callee to an inner Source, the source can be
|
||||
// either (a) a direct argument call (`outer(escape(x),
|
||||
// source())`) or (b) nested inside one wrapper
|
||||
// (`outer(escape(source(x)))`). Crediting any wrapper's
|
||||
// sanitizer caps when the source sits in a different argument
|
||||
// position would suppress real taint flow.
|
||||
//
|
||||
// * `source_arg_pos = Some(N)` — the source call is the
|
||||
// immediate callee of arg N (`arg_callees[N] == callee`).
|
||||
// No other-arg wrapper can sanitize it. Credit nothing.
|
||||
// * `source_arg_pos = None` — the source is nested inside
|
||||
// some arg's wrapper. Credit only when exactly one arg
|
||||
// has a sanitizing wrapper, since that one must be the
|
||||
// parent of the nested source. Multiple sanitizing
|
||||
// wrappers across different positions is ambiguous; stay
|
||||
// conservative and credit nothing.
|
||||
let caller_func_for_chain = info.ast.enclosing_func.as_deref().unwrap_or("");
|
||||
let mut chain_wrapper_sanitizer_caps = Cap::empty();
|
||||
if !info.arg_callees.is_empty() {
|
||||
let source_arg_pos = info
|
||||
.arg_callees
|
||||
.iter()
|
||||
.position(|c| c.as_deref() == Some(callee.as_str()));
|
||||
let mut per_arg_sanitizer_caps: SmallVec<[Cap; 4]> = SmallVec::new();
|
||||
for (idx, maybe_callee) in info.arg_callees.iter().enumerate() {
|
||||
if Some(idx) == source_arg_pos {
|
||||
continue;
|
||||
}
|
||||
let Some(wrap_callee) = maybe_callee else {
|
||||
continue;
|
||||
};
|
||||
if Some(wrap_callee.as_str()) == info.call.outer_callee.as_deref() {
|
||||
continue;
|
||||
}
|
||||
let mut caps_here = Cap::empty();
|
||||
if let Some(resolved) = resolve_callee_hinted(
|
||||
transfer,
|
||||
wrap_callee,
|
||||
caller_func_for_chain,
|
||||
info.call.call_ordinal,
|
||||
None,
|
||||
) {
|
||||
caps_here |= resolved.sanitizer_caps;
|
||||
} else {
|
||||
let labels = crate::labels::classify_all(
|
||||
transfer.lang.as_str(),
|
||||
wrap_callee,
|
||||
transfer.extra_labels,
|
||||
);
|
||||
for lbl in &labels {
|
||||
if let DataLabel::Sanitizer(bits) = lbl {
|
||||
caps_here |= *bits;
|
||||
}
|
||||
}
|
||||
}
|
||||
if !caps_here.is_empty() {
|
||||
per_arg_sanitizer_caps.push(caps_here);
|
||||
}
|
||||
}
|
||||
if source_arg_pos.is_none() && per_arg_sanitizer_caps.len() == 1 {
|
||||
chain_wrapper_sanitizer_caps = per_arg_sanitizer_caps[0];
|
||||
}
|
||||
}
|
||||
|
||||
// Container element-write hook. Runs before other Call-arm
|
||||
// processing so `try_container_propagation`'s early-return
|
||||
// can't bypass us. Writes only into `(loc, ELEM)` cells on
|
||||
|
|
@ -3033,8 +3169,48 @@ pub(super) fn transfer_inst(
|
|||
// through: cell `must = AND` over args (every writer must be
|
||||
// must-validated), `may = OR` over args. Anonymous SSA temps
|
||||
// contribute `false/false` and break the `must` invariant.
|
||||
if let (Some(pf), Some(rcv)) = (transfer.pointer_facts, *receiver) {
|
||||
if crate::pointer::is_container_write_callee(callee) {
|
||||
//
|
||||
// Two callee shapes:
|
||||
// * Method-style write (`receiver.push(val)`) — `receiver`
|
||||
// channel resolves the container, value args start at
|
||||
// position 0.
|
||||
// * Go `append` builtin (or chain shape with
|
||||
// `outer_callee == "append"`) — no receiver channel,
|
||||
// `args[0]` is the slice itself, value args start at
|
||||
// position 1.
|
||||
if let Some(pf) = transfer.pointer_facts {
|
||||
let go_append_chain = transfer.lang == Lang::Go
|
||||
&& receiver.is_none()
|
||||
&& (callee == "append" || info.call.outer_callee.as_deref() == Some("append"));
|
||||
// For Go append, args[0] is the input slice whose
|
||||
// points-to set may be empty when the slice was just
|
||||
// initialised with a composite literal (`cmds :=
|
||||
// []string{}`). The call result (inst.value) carries
|
||||
// the fresh allocation site that pointer analysis
|
||||
// attaches to every Call op, and downstream uses of
|
||||
// the slice flow through that result, so it is the
|
||||
// authoritative container identity. Fall back to
|
||||
// args[0] when the result has no pt set yet.
|
||||
let resolved_recv: Option<SsaValue> = if let Some(rcv) = *receiver {
|
||||
Some(rcv)
|
||||
} else if go_append_chain {
|
||||
let result_v = inst.value;
|
||||
let result_pt = pf.pt(result_v);
|
||||
if !result_pt.is_empty() && !result_pt.is_top() {
|
||||
Some(result_v)
|
||||
} else {
|
||||
args.first().and_then(|a| a.first().copied())
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let value_arg_start = if go_append_chain { 1 } else { 0 };
|
||||
let write_callee_match = if go_append_chain {
|
||||
true
|
||||
} else {
|
||||
crate::pointer::is_container_write_callee(callee)
|
||||
};
|
||||
if let (Some(rcv), true) = (resolved_recv, write_callee_match) {
|
||||
let pt = pf.pt(rcv);
|
||||
if !pt.is_empty() && !pt.is_top() {
|
||||
let mut elem_caps = Cap::empty();
|
||||
|
|
@ -3043,7 +3219,7 @@ pub(super) fn transfer_inst(
|
|||
let mut elem_must_all = true; // AND over args (vacuously true for empty args)
|
||||
let mut elem_may_any = false; // OR over args
|
||||
let mut saw_any_arg = false;
|
||||
for arg_group in args {
|
||||
for arg_group in args.iter().skip(value_arg_start) {
|
||||
for &arg_v in arg_group {
|
||||
saw_any_arg = true;
|
||||
if let Some(t) = state.get(arg_v) {
|
||||
|
|
@ -3059,6 +3235,35 @@ pub(super) fn transfer_inst(
|
|||
elem_may_any |= av;
|
||||
}
|
||||
}
|
||||
// Chain-shape Go append: the inner Source label
|
||||
// fires on this same call instruction, so its
|
||||
// caps are not yet on any positional arg's SSA
|
||||
// value at this point. Pull them in directly
|
||||
// from the source labels so the W4 cell sees
|
||||
// the real source caps; without this the cell
|
||||
// is empty for the chain shape and the index-
|
||||
// read taint flow appears clean for the wrong
|
||||
// reason.
|
||||
if go_append_chain {
|
||||
for lbl in &info.taint.labels {
|
||||
if let DataLabel::Source(bits) = lbl {
|
||||
elem_caps |= *bits;
|
||||
saw_any_arg = true;
|
||||
}
|
||||
}
|
||||
// A chain-shape sanitising wrapper around the
|
||||
// source counts as the validation that the
|
||||
// ELEM cell needs. Each entry in
|
||||
// `info.arg_callees` whose summary or label
|
||||
// exposes non-empty `sanitizer_caps`
|
||||
// contributes to validation, the cell's
|
||||
// must/may bits flip on so the index-read
|
||||
// counterpart sees the value as validated.
|
||||
if !chain_wrapper_sanitizer_caps.is_empty() {
|
||||
elem_must_all = true;
|
||||
elem_may_any = true;
|
||||
}
|
||||
}
|
||||
// Vacuous AND: a zero-arg container write supplies
|
||||
// no validation source, so coerce must to false.
|
||||
if !saw_any_arg {
|
||||
|
|
@ -3204,6 +3409,20 @@ pub(super) fn transfer_inst(
|
|||
}
|
||||
}
|
||||
|
||||
// Call-site replace sanitizer detection. Recognises
|
||||
// `s.replace*(pat, rep)` / `strings.ReplaceAll(s, pat, rep)` /
|
||||
// `str_replace($pat, $rep, $s)` shapes whose pattern is a
|
||||
// concrete shell/HTML/SQL escape literal and treats the call
|
||||
// as a sanitizer for the corresponding caps. Mirrors the
|
||||
// semantics that label-rule sanitizers already provide.
|
||||
if let Some(extra) = crate::symex::strings::detect_call_site_replace_sanitizer(
|
||||
callee,
|
||||
transfer.lang,
|
||||
&info.call.arg_string_literals,
|
||||
) {
|
||||
sanitizer_bits |= extra;
|
||||
}
|
||||
|
||||
// Resolve callee summary, always attempt, even when explicit
|
||||
// labels are present. Labels take precedence for source caps, but
|
||||
// summary propagation and sanitizer behaviour must still apply
|
||||
|
|
@ -4006,7 +4225,10 @@ pub(super) fn transfer_inst(
|
|||
// produces return_bits. Check if the wrapper function blocks taint:
|
||||
// if its SSA summary shows no propagation, no source_caps, and no
|
||||
// container identity return, the return value is independent of its
|
||||
// arguments, clear return_bits.
|
||||
// arguments, clear return_bits. Additionally apply the wrapper's
|
||||
// sanitizer caps (StripBits transforms) so a sanitising wrapper
|
||||
// like `validate(<source>)` clears the relevant cap bits even
|
||||
// when the wrapper still propagates other taint.
|
||||
if !return_bits.is_empty() && has_source_label {
|
||||
if let Some(ref oc) = info.call.outer_callee {
|
||||
if let Some(ref oc_sum) = resolve_callee_hinted(
|
||||
|
|
@ -4021,11 +4243,36 @@ pub(super) fn transfer_inst(
|
|||
// no internal sources reaching return.
|
||||
return_bits = Cap::empty();
|
||||
return_origins.clear();
|
||||
} else if !oc_sum.sanitizer_caps.is_empty() {
|
||||
return_bits &= !oc_sum.sanitizer_caps;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Chain-wrapper sanitizer suppression: when the chain shape
|
||||
// `outer(... wrapper(<source>) ...)` puts a sanitising wrapper
|
||||
// function between the inner Source and the outer call,
|
||||
// mark the call result's symbol as validated so any
|
||||
// downstream sink event over the same value fires with
|
||||
// `all_validated = true`, suppressing the taint finding and
|
||||
// (via [`record_path_safe_suppressed_span`]) the
|
||||
// `state-unauthed-access` finding on the same span.
|
||||
// `chain_wrapper_sanitizer_caps` is computed up-front above
|
||||
// so the container-element-write hook can also consult it.
|
||||
if has_source_label && !chain_wrapper_sanitizer_caps.is_empty() {
|
||||
if let Some(name) = ssa
|
||||
.value_defs
|
||||
.get(inst.value.0 as usize)
|
||||
.and_then(|vd| vd.var_name.as_deref())
|
||||
{
|
||||
if let Some(sym) = transfer.interner.get(name) {
|
||||
state.validated_must.insert(sym);
|
||||
state.validated_may.insert(sym);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Constructor cap narrowing: a `new X(...)` call returns an object
|
||||
// instance, not a string. Caps that name a string-shaped sink
|
||||
// pattern (path argument, format string, URL component, JSON
|
||||
|
|
@ -7654,11 +7901,12 @@ fn is_abstract_safe_for_sink(
|
|||
}
|
||||
|
||||
/// Check every tainted leaf flowing into `inst`'s used values carries a
|
||||
/// PathFact proving it is dotdot-free and non-absolute.
|
||||
/// PathFact proving it cannot perform path traversal.
|
||||
///
|
||||
/// Core gate for the rs-safe-0** FP closure (see [`PathFact::is_path_safe`]).
|
||||
/// Traces through Assign chains so `Path::new(sanitised)` still resolves
|
||||
/// to the sanitised string's fact.
|
||||
/// Core gate for the rs-safe-0** FP closure plus the canonicalised+rooted
|
||||
/// shape (see [`PathFact::is_path_traversal_safe`]). Traces through
|
||||
/// Assign chains so `Path::new(sanitised)` still resolves to the
|
||||
/// sanitised string's fact.
|
||||
fn is_path_safe_for_sink(
|
||||
inst: &SsaInst,
|
||||
state: &SsaTaintState,
|
||||
|
|
@ -7670,7 +7918,9 @@ fn is_path_safe_for_sink(
|
|||
if leaves.is_empty() {
|
||||
return false;
|
||||
}
|
||||
let safe = leaves.iter().all(|v| abs.get(*v).path.is_path_safe());
|
||||
let safe = leaves
|
||||
.iter()
|
||||
.all(|v| abs.get(*v).path.is_path_traversal_safe());
|
||||
if safe {
|
||||
// Publish the suppression to the file-level set so the
|
||||
// state-analysis pass can suppress `state-unauthed-access` on
|
||||
|
|
@ -7925,7 +8175,7 @@ fn trace_single_leaf(
|
|||
// existing trace-through-args behaviour.
|
||||
let proves_path_safe = state.abstract_state.as_ref().is_some_and(|abs_state| {
|
||||
let f = abs_state.get(v).path;
|
||||
!f.is_top() && f.is_path_safe()
|
||||
!f.is_top() && f.is_path_traversal_safe()
|
||||
});
|
||||
if is_source || proves_path_safe {
|
||||
leaves.push(v);
|
||||
|
|
|
|||
|
|
@ -1229,6 +1229,80 @@ mod goto_succ_propagation_tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn path_fact_negated_contains_dotdot_narrows_true_branch() {
|
||||
// `if !path.contains("..") { return; } sink(path);` — the surviving
|
||||
// (sink-reaching) arm is the TRUE branch of the IF condition. The
|
||||
// rejection axis (DotDot) must narrow `true_state`, not `false_state`,
|
||||
// otherwise the unsafe arm gets dotdot=No and the sink suppression
|
||||
// masks the bug.
|
||||
let ssa = ssa_body_with_named_value("path");
|
||||
let mut true_state = initial_state_with_abstract();
|
||||
let mut false_state = initial_state_with_abstract();
|
||||
|
||||
super::super::apply_path_fact_branch_narrowing_with_interner(
|
||||
&mut true_state,
|
||||
&mut false_state,
|
||||
"!path.contains(\"..\")",
|
||||
&["path".to_string()],
|
||||
&ssa,
|
||||
None,
|
||||
true,
|
||||
);
|
||||
|
||||
let true_abs = true_state.abstract_state.as_ref().unwrap();
|
||||
let false_abs = false_state.abstract_state.as_ref().unwrap();
|
||||
assert_eq!(
|
||||
true_abs.get(SsaValue(0)).path.dotdot,
|
||||
crate::abstract_interp::Tri::No,
|
||||
"negated-contains: TRUE arm (sink-reaching, safe) must narrow"
|
||||
);
|
||||
assert_eq!(
|
||||
false_abs.get(SsaValue(0)).path.dotdot,
|
||||
crate::abstract_interp::Tri::Maybe,
|
||||
"negated-contains: FALSE arm (rejection arm) must NOT narrow"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn path_fact_negated_filepath_islocal_narrows_false_branch() {
|
||||
// `if !filepath.IsLocal(p) { return; } sink(p);` — Go idiom. The
|
||||
// classifier consumes the `!` itself (pre-negated handler), so the
|
||||
// safe arm remains the FALSE branch of the whole condition even
|
||||
// though `condition_negated == true` at AST level.
|
||||
let ssa = ssa_body_with_named_value("p");
|
||||
let mut true_state = initial_state_with_abstract();
|
||||
let mut false_state = initial_state_with_abstract();
|
||||
|
||||
super::super::apply_path_fact_branch_narrowing_with_interner(
|
||||
&mut true_state,
|
||||
&mut false_state,
|
||||
"!filepath.IsLocal(p)",
|
||||
&["p".to_string()],
|
||||
&ssa,
|
||||
None,
|
||||
true,
|
||||
);
|
||||
|
||||
let true_abs = true_state.abstract_state.as_ref().unwrap();
|
||||
let false_abs = false_state.abstract_state.as_ref().unwrap();
|
||||
assert_eq!(
|
||||
false_abs.get(SsaValue(0)).path.dotdot,
|
||||
crate::abstract_interp::Tri::No,
|
||||
"!filepath.IsLocal: FALSE arm (sink-reaching, IsLocal=true) must narrow"
|
||||
);
|
||||
assert_eq!(
|
||||
false_abs.get(SsaValue(0)).path.absolute,
|
||||
crate::abstract_interp::Tri::No,
|
||||
"!filepath.IsLocal: FALSE arm absolute axis must narrow"
|
||||
);
|
||||
assert_eq!(
|
||||
true_abs.get(SsaValue(0)).path.dotdot,
|
||||
crate::abstract_interp::Tri::Maybe,
|
||||
"!filepath.IsLocal: TRUE arm (return) must NOT narrow"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn path_fact_no_match_leaves_state_untouched() {
|
||||
let ssa = ssa_body_with_named_value("x");
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue