mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-24 20:28:06 +02:00
Python fp and docs updtes (#58)
* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
This commit is contained in:
parent
4db0805de6
commit
a438886217
291 changed files with 9485 additions and 3851 deletions
|
|
@ -115,8 +115,8 @@ pub static PARAM_CONFIG: ParamConfig = ParamConfig {
|
|||
/// Benchmark-driven output-parameter source positions for known C APIs.
|
||||
/// Maps callee name → argument positions that receive Source taint.
|
||||
pub static OUTPUT_PARAM_SOURCES: &[(&str, &[usize])] = &[
|
||||
("fgets", &[0]), // fgets(buf, size, stream) — buf receives input
|
||||
("gets", &[0]), // gets(buf) — buf receives input
|
||||
("fgets", &[0]), // fgets(buf, size, stream), buf receives input
|
||||
("gets", &[0]), // gets(buf), buf receives input
|
||||
("recv", &[1]), // recv(fd, buf, len, flags)
|
||||
("recvfrom", &[1]), // recvfrom(fd, buf, len, flags, ...)
|
||||
];
|
||||
|
|
|
|||
|
|
@ -120,7 +120,7 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
// and extract them as separate bodies. Without these, a
|
||||
// `class_specifier` / `struct_specifier` falls through to the
|
||||
// generic `_ =>` arm in `build_sub`, which records a leaf `Seq`
|
||||
// node and never walks the body — so inline member-function
|
||||
// node and never walks the body, so inline member-function
|
||||
// definitions (and methods of nested classes) are silently dropped.
|
||||
"declaration_list" => Kind::Block,
|
||||
"field_declaration_list" => Kind::Block,
|
||||
|
|
@ -160,7 +160,7 @@ pub static PARAM_CONFIG: ParamConfig = ParamConfig {
|
|||
|
||||
/// Benchmark-driven output-parameter source positions for known C++ APIs.
|
||||
pub static OUTPUT_PARAM_SOURCES: &[(&str, &[usize])] = &[
|
||||
("getline", &[1]), // std::getline(stream, str) — str receives input
|
||||
("getline", &[1]), // std::getline(stream, str), str receives input
|
||||
("std::getline", &[1]),
|
||||
("fgets", &[0]),
|
||||
("gets", &[0]),
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// fmt.Printf/Sprintf write to stdout or build strings in memory — not
|
||||
// fmt.Printf/Sprintf write to stdout or build strings in memory, not
|
||||
// security sinks. fmt.Fprintf writes to an io.Writer (often http.ResponseWriter)
|
||||
// so it IS a security sink for XSS.
|
||||
LabelRule {
|
||||
|
|
@ -110,7 +110,7 @@ pub static RULES: &[LabelRule] = &[
|
|||
// Idiomatic Go SSRF sinks (Owncast CVE-2023-3188) use the
|
||||
// `http.DefaultClient.Get(url)` form rather than the bare
|
||||
// `http.Get(url)` helper, so the suffix-matched callee text needs
|
||||
// an explicit entry here — bare `Get/Post/Do/Head` would
|
||||
// an explicit entry here, bare `Get/Post/Do/Head` would
|
||||
// over-match unrelated method names.
|
||||
"http.DefaultClient.Get",
|
||||
"http.DefaultClient.Post",
|
||||
|
|
|
|||
|
|
@ -53,13 +53,13 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sanitizer(Cap::URL_ENCODE),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// OWASP ESAPI input validator — validates and canonicalizes input
|
||||
// OWASP ESAPI input validator, validates and canonicalizes input
|
||||
LabelRule {
|
||||
matchers: &["Validator.getValidInput"],
|
||||
label: DataLabel::Sanitizer(Cap::all()),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Type-check sanitizers — parsing to a primitive erases taint
|
||||
// Type-check sanitizers, parsing to a primitive erases taint
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"Integer.parseInt",
|
||||
|
|
@ -99,7 +99,7 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::CODE_EXEC),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// HTTP response sinks — println/print are broad (also match System.out)
|
||||
// HTTP response sinks, println/print are broad (also match System.out)
|
||||
// but necessary to catch response.getWriter().println() via suffix matching.
|
||||
LabelRule {
|
||||
matchers: &["println", "print"],
|
||||
|
|
@ -107,7 +107,7 @@ pub static RULES: &[LabelRule] = &[
|
|||
case_sensitive: false,
|
||||
},
|
||||
// openConnection() is the standard java.net.URL API for initiating a connection.
|
||||
// It is the correct interception point — the URL is already set on the object.
|
||||
// It is the correct interception point, the URL is already set on the object.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"openConnection",
|
||||
|
|
@ -153,9 +153,9 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// NOTE: Java logging (logger.info, log.warn, etc.) removed as sinks —
|
||||
// NOTE: Java logging (logger.info, log.warn, etc.) removed as sinks ,
|
||||
// logging format injection is not a real security vulnerability in Java.
|
||||
// String.format also removed — it builds strings in memory (not a sink);
|
||||
// String.format also removed, it builds strings in memory (not a sink);
|
||||
// the real sink is wherever the formatted string is used (SQL, HTTP, etc.).
|
||||
// ─── JNDI injection sinks ───
|
||||
LabelRule {
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ pub static RULES: &[LabelRule] = &[
|
|||
case_sensitive: false,
|
||||
},
|
||||
// `encodeURIComponent` percent-encodes every character outside the
|
||||
// ASCII identifier alphabet, including `<`, `>`, `&`, `"`, `'` — so
|
||||
// ASCII identifier alphabet, including `<`, `>`, `&`, `"`, `'`, so
|
||||
// the result is safe to embed in HTML text content and HTML
|
||||
// attribute values, not just URL components. Treating it as
|
||||
// covering both URL_ENCODE and HTML_ESCAPE caps avoids FPs when a
|
||||
|
|
@ -92,7 +92,7 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sanitizer(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// he library — HTML entity encoding
|
||||
// he library, HTML entity encoding
|
||||
LabelRule {
|
||||
matchers: &["he.encode", "he.escape"],
|
||||
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
|
||||
|
|
@ -148,16 +148,16 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// ── Outbound HTTP clients — modeled as destination-aware gated sinks ──
|
||||
// ── Outbound HTTP clients, modeled as destination-aware gated sinks ──
|
||||
// Flat-Sink modeling of fetch/axios/got/undici/http.request was producing
|
||||
// a dominant FP class where any tainted body/payload arg appeared as SSRF
|
||||
// (e.g. `fetch("/api/telemetry", { body: navigator.userAgent })`). SSRF
|
||||
// semantics require attacker control over the *destination*, not the
|
||||
// payload. The gated entries in `GATED_SINKS` below narrow activation to
|
||||
// URL / host / path / origin arguments or object fields. Taint flowing
|
||||
// only to body / data / json / headers is no longer flagged as SSRF —
|
||||
// cross-boundary data-exfiltration detection is a separate future
|
||||
// capability (`Cap::DATA_EXFIL`, not yet introduced).
|
||||
// payload. The gated entries in `GATED_SINKS` below narrow SSRF
|
||||
// activation to URL / host / path / origin arguments or object fields.
|
||||
// Taint flowing only to body / data / json / headers is captured by a
|
||||
// *separate* gate class (`Cap::DATA_EXFIL`) so the two can coexist on
|
||||
// the same callee without one over-flagging the other.
|
||||
// Express response sinks
|
||||
LabelRule {
|
||||
matchers: &["res.send", "res.json"],
|
||||
|
|
@ -222,6 +222,21 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ── Cross-boundary data exfiltration (DATA_EXFIL) ─────────────────────
|
||||
//
|
||||
// `XMLHttpRequest.prototype.send(body)`, when the receiver type is
|
||||
// tracked back to `new XMLHttpRequest()`, the SSA engine's type-qualified
|
||||
// resolver converts `xhr.send` to `HttpClient.send`; matching that form
|
||||
// fires DATA_EXFIL on tainted body flow. The explicit
|
||||
// `XMLHttpRequest.prototype.send.apply(...)` form is also covered. The
|
||||
// `fetch` body / headers / json case is covered by the gated entry in
|
||||
// `GATED_SINKS` (so SSRF on the URL and DATA_EXFIL on the payload can
|
||||
// coexist on a single call site).
|
||||
LabelRule {
|
||||
matchers: &["HttpClient.send", "XMLHttpRequest.prototype.send"],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ─────────── SQL injection sinks ─────────────
|
||||
// Database drivers: mysql, mysql2, pg, better-sqlite3
|
||||
LabelRule {
|
||||
|
|
@ -314,7 +329,7 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
// only to body / data / json / headers / payload is silenced. See the
|
||||
// commentary at the top of RULES for the rationale.
|
||||
//
|
||||
// `fetch(input, init)` — arg 0 can be a URL string OR a Request/config
|
||||
// `fetch(input, init)`, arg 0 can be a URL string OR a Request/config
|
||||
// object with `url`. Per WHATWG Fetch, when `input` is a dictionary, the
|
||||
// URL field is canonically `url`. Init-object body/headers at arg 1 are
|
||||
// *not* destination-bearing.
|
||||
|
|
@ -332,7 +347,7 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
object_destination_fields: &["url"],
|
||||
},
|
||||
},
|
||||
// `axios(config)` / `axios.request(config)` — config object exposes
|
||||
// `axios(config)` / `axios.request(config)`, config object exposes
|
||||
// `url` and `baseURL`. Body-ish fields (`data`, `params`, `headers`)
|
||||
// are excluded.
|
||||
SinkGate {
|
||||
|
|
@ -363,7 +378,7 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
object_destination_fields: &["url", "baseURL"],
|
||||
},
|
||||
},
|
||||
// `axios.get(url[, config])` — arg 0 is URL; arg 1 is config.
|
||||
// `axios.get(url[, config])`, arg 0 is URL; arg 1 is config.
|
||||
SinkGate {
|
||||
callee_matcher: "axios.get",
|
||||
arg_index: 0,
|
||||
|
|
@ -378,7 +393,7 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `axios.post(url, data[, config])` — arg 0 is URL; `data` at arg 1 is
|
||||
// `axios.post(url, data[, config])`, arg 0 is URL; `data` at arg 1 is
|
||||
// the request body and must NOT activate SSRF.
|
||||
SinkGate {
|
||||
callee_matcher: "axios.post",
|
||||
|
|
@ -394,7 +409,7 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `axios.put / axios.patch / axios.delete` follow the same shape —
|
||||
// `axios.put / axios.patch / axios.delete` follow the same shape ,
|
||||
// (url, data?, config?). Keep the model consistent across verbs.
|
||||
SinkGate {
|
||||
callee_matcher: "axios.put",
|
||||
|
|
@ -438,7 +453,7 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `got(url[, options])` / `got(options)` — options exposes `url` and
|
||||
// `got(url[, options])` / `got(options)`, options exposes `url` and
|
||||
// `prefixUrl`. Body-ish fields (`body`, `json`, `form`, `searchParams`,
|
||||
// `headers`) are excluded.
|
||||
SinkGate {
|
||||
|
|
@ -455,7 +470,7 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
object_destination_fields: &["url", "prefixUrl"],
|
||||
},
|
||||
},
|
||||
// `undici.request(url | opts[, opts])` — opts exposes `origin` and
|
||||
// `undici.request(url | opts[, opts])`, opts exposes `origin` and
|
||||
// `path`. Body-ish fields (`body`, `headers`) are excluded.
|
||||
SinkGate {
|
||||
callee_matcher: "undici.request",
|
||||
|
|
@ -471,11 +486,11 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
object_destination_fields: &["origin", "path"],
|
||||
},
|
||||
},
|
||||
// Node `http.request(options[, cb])` / `https.request(options[, cb])` —
|
||||
// Node `http.request(options[, cb])` / `https.request(options[, cb])` ,
|
||||
// options exposes `host`, `hostname`, `path`, `protocol`, `port`,
|
||||
// `origin`. Body is sent via `.write()`/`.end()` on the returned
|
||||
// ClientRequest, so it never appears as a positional arg here.
|
||||
// Arg 0 may also be a URL string — the "whole arg is destination"
|
||||
// Arg 0 may also be a URL string, the "whole arg is destination"
|
||||
// fallback (triggered when arg 0 is not an object literal) covers that.
|
||||
SinkGate {
|
||||
callee_matcher: "http.request",
|
||||
|
|
@ -505,7 +520,7 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
object_destination_fields: &["host", "hostname", "path", "protocol", "port", "origin"],
|
||||
},
|
||||
},
|
||||
// Node `http.get(options[, cb])` / `https.get(options[, cb])` —
|
||||
// Node `http.get(options[, cb])` / `https.get(options[, cb])` ,
|
||||
// convenience wrappers around `.request()` that auto-call `.end()`.
|
||||
// Same destination semantics as `.request`. Motivated by
|
||||
// CVE-2025-64430 (Parse Server SSRF via http.get(uri)).
|
||||
|
|
@ -537,6 +552,31 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
object_destination_fields: &["host", "hostname", "path", "protocol", "port", "origin"],
|
||||
},
|
||||
},
|
||||
// ── Cross-boundary data exfiltration ──────────────────────────────────
|
||||
//
|
||||
// Sensitive data flowing into the *payload* of an outbound request is a
|
||||
// distinct vulnerability class from SSRF: the destination is fixed but
|
||||
// attacker-influenced bytes leave the process via the request body /
|
||||
// headers / json field. These gates fire on the body-bearing positions
|
||||
// and emit `Cap::DATA_EXFIL`, which is intentionally separate from
|
||||
// `Cap::SSRF` so a `fetch(taintedUrl, {body: tainted})` site reports
|
||||
// both classes independently.
|
||||
//
|
||||
// `fetch(input, init)`, `init` at arg 1 carries body / headers / json.
|
||||
SinkGate {
|
||||
callee_matcher: "fetch",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["body", "headers", "json"],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ pub struct LabelRule {
|
|||
/// expands it to `(0..arity)` using the actual call arity.
|
||||
///
|
||||
/// The value `usize::MAX` is used because `args.get(usize::MAX)` is a guaranteed
|
||||
/// miss for any real argument list — an accidental direct-lookup would be a no-op
|
||||
/// miss for any real argument list, an accidental direct-lookup would be a no-op
|
||||
/// rather than silently aliasing position 0.
|
||||
pub const ALL_ARGS_PAYLOAD: &[usize] = &[usize::MAX];
|
||||
|
||||
|
|
@ -54,7 +54,7 @@ pub enum GateActivation {
|
|||
/// arg selects the MIME type).
|
||||
ValueMatch,
|
||||
/// Destination-bearing flow activation. The gate fires when taint reaches
|
||||
/// a declared destination location at the call site — no literal
|
||||
/// a declared destination location at the call site, no literal
|
||||
/// inspection, no prefix heuristic.
|
||||
///
|
||||
/// For callees whose destination is a positional argument (e.g. `fetch`'s
|
||||
|
|
@ -80,7 +80,7 @@ pub enum GateActivation {
|
|||
}
|
||||
|
||||
/// Argument-sensitive sink activation. Whether a call becomes a sink is
|
||||
/// determined by the gate's [`GateActivation`] mode — literal-value matching
|
||||
/// determined by the gate's [`GateActivation`] mode, literal-value matching
|
||||
/// for traditional role-selector APIs, or destination-flow activation for
|
||||
/// outbound HTTP clients and other APIs where a specific location in the
|
||||
/// call carries the attacker-controlled destination.
|
||||
|
|
@ -144,6 +144,13 @@ bitflags! {
|
|||
/// carrier cap for folding `auth_analysis` into the SSA/taint
|
||||
/// engine.
|
||||
const UNAUTHORIZED_ID = 0b0001_0000_0000_0000; // bit 12
|
||||
/// Cross-boundary data-exfiltration: tainted sensitive data flowing
|
||||
/// into outbound request bodies, headers, or other payload-bearing
|
||||
/// fields of network egress APIs. Distinct from `SSRF` (attacker
|
||||
/// control over the destination URL), `DATA_EXFIL` fires when the
|
||||
/// destination is fixed but attacker-influenced data leaves the
|
||||
/// process via the request payload.
|
||||
const DATA_EXFIL = 0b0010_0000_0000_0000; // bit 13
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -192,7 +199,7 @@ pub enum Kind {
|
|||
/// reachability does not depend on sibling-case execution order.
|
||||
Switch,
|
||||
Trivia,
|
||||
/// Simple sequential expression (e.g. cast/type-assertion) — treated like
|
||||
/// Simple sequential expression (e.g. cast/type-assertion), treated like
|
||||
/// any other sequential statement in the CFG but explicitly classified so
|
||||
/// code that inspects `Kind` can recognise it.
|
||||
Seq,
|
||||
|
|
@ -472,9 +479,9 @@ pub enum SourceKind {
|
|||
FileSystem,
|
||||
/// Database query results
|
||||
Database,
|
||||
/// Caught exception — may carry user-controlled data
|
||||
/// Caught exception, may carry user-controlled data
|
||||
CaughtException,
|
||||
/// Could not determine — treat conservatively
|
||||
/// Could not determine, treat conservatively
|
||||
Unknown,
|
||||
}
|
||||
|
||||
|
|
@ -511,7 +518,7 @@ pub fn infer_source_kind(caps: Cap, callee: &str) -> SourceKind {
|
|||
|
||||
// File system patterns
|
||||
if cl.contains("read") || cl.contains("fopen") || cl.contains("open") {
|
||||
// Distinguish from db reads — file reads typically have FILE_IO cap
|
||||
// Distinguish from db reads, file reads typically have FILE_IO cap
|
||||
if caps.contains(Cap::FILE_IO) {
|
||||
return SourceKind::FileSystem;
|
||||
}
|
||||
|
|
@ -570,6 +577,7 @@ pub fn parse_cap(s: &str) -> Option<Cap> {
|
|||
"code_exec" => Some(Cap::CODE_EXEC),
|
||||
"crypto" => Some(Cap::CRYPTO),
|
||||
"unauthorized_id" => Some(Cap::UNAUTHORIZED_ID),
|
||||
"data_exfil" | "data_exfiltration" => Some(Cap::DATA_EXFIL),
|
||||
"all" => Some(Cap::all()),
|
||||
_ => None,
|
||||
}
|
||||
|
|
@ -621,7 +629,7 @@ pub fn build_lang_rules(
|
|||
Vec::new()
|
||||
};
|
||||
|
||||
// Phase C: fold `auth_analysis` into the taint engine by injecting
|
||||
// fold `auth_analysis` into the taint engine by injecting
|
||||
// `Cap::UNAUTHORIZED_ID` sink/sanitizer rules. Gated by config; default
|
||||
// OFF so the standalone `auth_analysis` subsystem remains authoritative.
|
||||
if config.scanner.enable_auth_as_taint {
|
||||
|
|
@ -636,7 +644,7 @@ pub fn build_lang_rules(
|
|||
}
|
||||
}
|
||||
|
||||
/// Return Phase C auth-as-taint rules for a given language (currently Rust-only).
|
||||
/// Return the auth-as-taint rules for a given language (Rust-only).
|
||||
fn phase_c_auth_rules_for_lang(lang_slug: &str) -> Vec<RuntimeLabelRule> {
|
||||
match lang_slug {
|
||||
"rust" | "rs" => rust::phase_c_auth_rules(),
|
||||
|
|
@ -718,7 +726,7 @@ fn match_suffix_cs(text: &[u8], matcher: &[u8], case_sensitive: bool) -> bool {
|
|||
if exact_only {
|
||||
// `=foo` matchers fire only when `text` IS `foo` (no `Mod.foo`,
|
||||
// `Class::foo`, or any preceding namespace). Lets a label rule
|
||||
// distinguish bare `Kernel#open` from `File.open` — the former
|
||||
// distinguish bare `Kernel#open` from `File.open`, the former
|
||||
// shells out on `|cmd`, the latter never does (CVE-2020-8130).
|
||||
start == 0
|
||||
} else {
|
||||
|
|
@ -731,7 +739,7 @@ fn match_suffix_cs(text: &[u8], matcher: &[u8], case_sensitive: bool) -> bool {
|
|||
|
||||
/// Strip an optional `=` "exact-match" sigil from the start of a matcher.
|
||||
/// Matchers prefixed with `=` (e.g. `"=open"`) only fire when the candidate
|
||||
/// text equals the matcher exactly — the boundary-`.`-or-`:` allowance is
|
||||
/// text equals the matcher exactly, the boundary-`.`-or-`:` allowance is
|
||||
/// suppressed. Used to distinguish bare-callee Ruby/Python builtins from
|
||||
/// methods of the same name on a typed receiver.
|
||||
#[inline]
|
||||
|
|
@ -767,7 +775,7 @@ pub fn classify(lang: &str, text: &str, extra: Option<&[RuntimeLabelRule]>) -> O
|
|||
let full_normalized = normalize_chained_call(text);
|
||||
let full_norm_bytes = full_normalized.as_bytes();
|
||||
|
||||
// ── Check runtime (config) rules first — they take priority ──────
|
||||
// ── Check runtime (config) rules first, they take priority ──────
|
||||
if let Some(extras) = extra {
|
||||
// Pass 1: exact / suffix
|
||||
for rule in extras {
|
||||
|
|
@ -865,7 +873,7 @@ pub fn classify_all(
|
|||
}
|
||||
}
|
||||
|
||||
// ── Check runtime (config) rules first — they take priority ──────
|
||||
// ── Check runtime (config) rules first, they take priority ──────
|
||||
if let Some(extras) = extra {
|
||||
// Pass 1: exact / suffix
|
||||
for rule in extras {
|
||||
|
|
@ -941,7 +949,7 @@ pub fn classify_all(
|
|||
/// (or [`ALL_ARGS_PAYLOAD`] for dynamic-activation conservative fallback).
|
||||
/// `object_destination_fields`, when non-empty, restricts sink-taint checks
|
||||
/// to identifiers found under those field names within an object-literal
|
||||
/// positional argument — used by destination-aware outbound-HTTP gates so
|
||||
/// positional argument, used by destination-aware outbound-HTTP gates so
|
||||
/// `fetch({url, body})` fires only when taint reaches `url`, not `body`.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct GateMatch {
|
||||
|
|
@ -952,9 +960,13 @@ pub struct GateMatch {
|
|||
|
||||
/// Classify a call against gated sink rules.
|
||||
///
|
||||
/// Returns `Some(GateMatch)` if the callee matches a gated rule AND the
|
||||
/// activation conditions fire. Returns `None` if the callee doesn't match
|
||||
/// any gated rule, or matches but the activation is provably safe.
|
||||
/// Returns every gate whose callee matches AND whose activation conditions
|
||||
/// fire. An empty result means the callee did not match any gated rule, or
|
||||
/// every match was provably safe. Multiple matches are possible when the
|
||||
/// same callee carries gates for different sink classes, e.g. `fetch` is
|
||||
/// both an SSRF gate (URL flow) and a `DATA_EXFIL` gate (body / headers /
|
||||
/// json flow); each gate carries its own [`GateMatch`] so downstream code
|
||||
/// can attribute findings per-cap.
|
||||
///
|
||||
/// `const_arg_at` extracts positional argument values.
|
||||
/// `const_keyword_arg` extracts keyword argument values (for languages like Python).
|
||||
|
|
@ -964,11 +976,15 @@ pub fn classify_gated_sink(
|
|||
const_arg_at: impl Fn(usize) -> Option<String>,
|
||||
const_keyword_arg: impl Fn(&str) -> Option<String>,
|
||||
kwarg_present: impl Fn(&str) -> bool,
|
||||
) -> Option<GateMatch> {
|
||||
let gates = GATED_REGISTRY.get(lang).or_else(|| {
|
||||
) -> SmallVec<[GateMatch; 2]> {
|
||||
let mut out: SmallVec<[GateMatch; 2]> = SmallVec::new();
|
||||
let gates = match GATED_REGISTRY.get(lang).or_else(|| {
|
||||
let key = lang.to_ascii_lowercase();
|
||||
GATED_REGISTRY.get(key.as_str())
|
||||
})?;
|
||||
}) {
|
||||
Some(g) => g,
|
||||
None => return out,
|
||||
};
|
||||
|
||||
let callee_bytes = callee_text.as_bytes();
|
||||
|
||||
|
|
@ -985,11 +1001,12 @@ pub fn classify_gated_sink(
|
|||
object_destination_fields,
|
||||
} = gate.activation
|
||||
{
|
||||
return Some(GateMatch {
|
||||
out.push(GateMatch {
|
||||
label: gate.label,
|
||||
payload_args: gate.payload_args,
|
||||
object_destination_fields,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// ── ValueMatch activation (legacy) ───────────────────────────────
|
||||
|
|
@ -1012,7 +1029,7 @@ pub fn classify_gated_sink(
|
|||
any_dangerous = true;
|
||||
break;
|
||||
}
|
||||
// Present with a safe literal — continue checking other kwargs.
|
||||
// Present with a safe literal, continue checking other kwargs.
|
||||
}
|
||||
None => {
|
||||
any_dynamic_present = true;
|
||||
|
|
@ -1020,23 +1037,25 @@ pub fn classify_gated_sink(
|
|||
}
|
||||
}
|
||||
if any_dangerous {
|
||||
return Some(GateMatch {
|
||||
out.push(GateMatch {
|
||||
label: gate.label,
|
||||
payload_args: gate.payload_args,
|
||||
object_destination_fields: &[],
|
||||
});
|
||||
continue;
|
||||
}
|
||||
if any_dynamic_present {
|
||||
// Dynamic kwarg value — we can't prove safe. Conservatively
|
||||
// Dynamic kwarg value, we can't prove safe. Conservatively
|
||||
// flag every positional arg so the activation pathway isn't
|
||||
// silently narrowed to the gate's declared `payload_args`.
|
||||
return Some(GateMatch {
|
||||
out.push(GateMatch {
|
||||
label: gate.label,
|
||||
payload_args: ALL_ARGS_PAYLOAD,
|
||||
object_destination_fields: &[],
|
||||
});
|
||||
continue;
|
||||
}
|
||||
return None; // all listed kwargs absent or safe-literal → suppress
|
||||
continue; // all listed kwargs absent or safe-literal → suppress
|
||||
}
|
||||
|
||||
// Single-kwarg / positional gate path (original semantics).
|
||||
|
|
@ -1058,22 +1077,22 @@ pub fn classify_gated_sink(
|
|||
.iter()
|
||||
.any(|p| lower.starts_with(&p.to_ascii_lowercase()));
|
||||
if is_dangerous {
|
||||
return Some(GateMatch {
|
||||
out.push(GateMatch {
|
||||
label: gate.label,
|
||||
payload_args: gate.payload_args,
|
||||
object_destination_fields: &[],
|
||||
});
|
||||
}
|
||||
return None; // safe constant → suppress
|
||||
// safe constant → suppress (no push)
|
||||
}
|
||||
// Unknown / dynamic activation arg: the gate fires conservatively,
|
||||
// but we can't prove that only the declared `payload_args` carry
|
||||
// risk — a tainted activation arg (e.g. `setAttribute(userAttr, …)`
|
||||
// risk, a tainted activation arg (e.g. `setAttribute(userAttr, …)`
|
||||
// where `userAttr` is user-controlled) is itself a vulnerability
|
||||
// path. Return ALL_ARGS_PAYLOAD so downstream sink scanning
|
||||
// considers every positional argument.
|
||||
None => {
|
||||
return Some(GateMatch {
|
||||
out.push(GateMatch {
|
||||
label: gate.label,
|
||||
payload_args: ALL_ARGS_PAYLOAD,
|
||||
object_destination_fields: &[],
|
||||
|
|
@ -1081,7 +1100,7 @@ pub fn classify_gated_sink(
|
|||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
out
|
||||
}
|
||||
|
||||
/// Public wrapper for [`normalize_chained_call`] so callers outside the module
|
||||
|
|
@ -1090,25 +1109,11 @@ pub fn normalize_chained_call_for_classify(text: &str) -> String {
|
|||
normalize_chained_call(text)
|
||||
}
|
||||
|
||||
/// Return the bare method-name segment of a callee text.
|
||||
///
|
||||
/// Centralised replacement for the textual `callee.rsplit('.').next().unwrap_or(callee)`
|
||||
/// pattern that used to be scattered across the codebase.
|
||||
///
|
||||
/// Behaviour-preserving across the Phase 2 SSA chain decomposition rollout:
|
||||
/// - When SSA lowering rewrites a chained-receiver call (`c.mu.Lock()` →
|
||||
/// `Call("Lock", [v_mu])`), the call's `callee` is already the bare method
|
||||
/// name, so this helper is a no-op pass-through.
|
||||
/// - For 1-dot callees (`obj.method`) and for languages where Phase 2 lowering
|
||||
/// doesn't run yet (PHP/Ruby) the helper still extracts the trailing method
|
||||
/// from the textual form, exactly as the old per-callsite split did.
|
||||
/// - For bare callees (no dot), it returns the input unchanged.
|
||||
///
|
||||
/// Use this helper when you need the *terminal* method name from a callee
|
||||
/// string regardless of whether the call had a chained receiver. When you
|
||||
/// have an `SsaOp::Call` in hand, prefer reading `callee` directly and
|
||||
/// walking `receiver` through `FieldProj` ops — that's the precise path.
|
||||
/// This helper is the textual fallback for callsites that only see a `&str`.
|
||||
/// Return the bare method-name segment of a callee text. Returns the
|
||||
/// input unchanged for bare callees. When you have an `SsaOp::Call`,
|
||||
/// prefer reading `callee` directly and walking `receiver` through
|
||||
/// `FieldProj` ops, this helper is the textual fallback for callsites
|
||||
/// that only see a `&str`.
|
||||
pub fn bare_method_name(callee: &str) -> &str {
|
||||
callee.rsplit('.').next().unwrap_or(callee)
|
||||
}
|
||||
|
|
@ -1314,19 +1319,15 @@ mod tests {
|
|||
fn bare_method_name_strips_chain() {
|
||||
// No-dot input → returned as-is.
|
||||
assert_eq!(bare_method_name("foo"), "foo");
|
||||
// 1-dot → trailing segment (Phase 2 leaves these alone in SSA).
|
||||
// 1-dot → trailing segment.
|
||||
assert_eq!(bare_method_name("obj.method"), "method");
|
||||
// Multi-dot → trailing segment (matches AST-only callees from
|
||||
// PHP/Ruby and any pre-Phase-2 textual paths kept around in
|
||||
// `callee_text` for display).
|
||||
// Multi-dot → trailing segment.
|
||||
assert_eq!(bare_method_name("a.b.c.method"), "method");
|
||||
// Trailing dot → empty trailing segment, matching the legacy
|
||||
// `rsplit('.').next()` behaviour bit-for-bit.
|
||||
// Trailing dot → empty trailing segment.
|
||||
assert_eq!(bare_method_name("foo."), "");
|
||||
// Empty input.
|
||||
assert_eq!(bare_method_name(""), "");
|
||||
// Phase 2 invariant: when SSA decomposed a chain, `callee` is
|
||||
// the bare method already and the helper is a no-op.
|
||||
// SSA-decomposed chains pass through untouched.
|
||||
assert_eq!(bare_method_name("Lock"), "Lock");
|
||||
}
|
||||
|
||||
|
|
@ -1399,7 +1400,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn classify_bare_href_is_none() {
|
||||
// Bare "href" should NOT be a sink — only "location.href" and variants
|
||||
// Bare "href" should NOT be a sink, only "location.href" and variants
|
||||
let result = classify("javascript", "href", None);
|
||||
assert_eq!(result, None);
|
||||
}
|
||||
|
|
@ -1497,7 +1498,7 @@ mod tests {
|
|||
#[test]
|
||||
fn classify_go_user_client_get_is_not_ssrf_sink() {
|
||||
// `client.Get` on a user-named *http.Client variable should NOT
|
||||
// match — the Go SSRF set is restricted to the stdlib package
|
||||
// match, the Go SSRF set is restricted to the stdlib package
|
||||
// helper `http.DefaultClient`. Type-aware resolution would be the
|
||||
// path to a broader rule, not a bare-name match.
|
||||
let result = classify("go", "client.Get", None);
|
||||
|
|
@ -1530,7 +1531,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn classify_ruby_io_open_is_not_shell_escape_sink() {
|
||||
// `IO.open` takes a file descriptor — never pipes. The bare-
|
||||
// `IO.open` takes a file descriptor, never pipes. The bare-
|
||||
// open CMDI rule must leave it alone.
|
||||
let result = classify("ruby", "IO.open", None);
|
||||
assert_ne!(result, Some(DataLabel::Sink(Cap::SHELL_ESCAPE)));
|
||||
|
|
@ -1572,7 +1573,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn classify_cpp_sto_family_is_sanitizer() {
|
||||
// Phase 1: full `std::sto*` family (including 64-bit and `long
|
||||
// full `std::sto*` family (including 64-bit and `long
|
||||
// double` variants) clears every taint cap that flows through it,
|
||||
// matching the existing `std::stoi`/`std::stol` rule.
|
||||
for callee in [
|
||||
|
|
@ -1621,6 +1622,16 @@ mod tests {
|
|||
false
|
||||
}
|
||||
|
||||
/// Find the first matching gate whose label sink-caps overlap `caps`.
|
||||
/// Lets tests target a specific gate when a callee carries multiple
|
||||
/// (e.g. `fetch` is both an SSRF and a `DATA_EXFIL` gate).
|
||||
fn find_match_with_caps(matches: &[GateMatch], caps: Cap) -> Option<GateMatch> {
|
||||
matches
|
||||
.iter()
|
||||
.find(|m| matches!(m.label, DataLabel::Sink(c) if c.intersects(caps)))
|
||||
.copied()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gated_sink_dangerous_exact() {
|
||||
let result = classify_gated_sink(
|
||||
|
|
@ -1631,12 +1642,12 @@ mod tests {
|
|||
no_kw_present,
|
||||
);
|
||||
assert_eq!(
|
||||
result,
|
||||
Some(GateMatch {
|
||||
result.as_slice(),
|
||||
&[GateMatch {
|
||||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
payload_args: [1usize].as_slice(),
|
||||
object_destination_fields: &[],
|
||||
})
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -1650,12 +1661,12 @@ mod tests {
|
|||
no_kw_present,
|
||||
);
|
||||
assert_eq!(
|
||||
result,
|
||||
Some(GateMatch {
|
||||
result.as_slice(),
|
||||
&[GateMatch {
|
||||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
payload_args: [1usize].as_slice(),
|
||||
object_destination_fields: &[],
|
||||
})
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -1668,24 +1679,24 @@ mod tests {
|
|||
no_kw,
|
||||
no_kw_present,
|
||||
);
|
||||
assert_eq!(result, None);
|
||||
assert!(result.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gated_sink_dynamic_conservative() {
|
||||
// Dynamic activation (e.g. `setAttribute(attrVar, val)`) returns the
|
||||
// ALL_ARGS_PAYLOAD sentinel so callers expand payload tracking to
|
||||
// every positional arg — the activation arg itself is a vulnerability
|
||||
// every positional arg, the activation arg itself is a vulnerability
|
||||
// path when attacker-controlled.
|
||||
let result =
|
||||
classify_gated_sink("javascript", "setAttribute", |_| None, no_kw, no_kw_present);
|
||||
assert_eq!(
|
||||
result,
|
||||
Some(GateMatch {
|
||||
result.as_slice(),
|
||||
&[GateMatch {
|
||||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
payload_args: ALL_ARGS_PAYLOAD,
|
||||
object_destination_fields: &[],
|
||||
})
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -1698,7 +1709,7 @@ mod tests {
|
|||
no_kw,
|
||||
no_kw_present,
|
||||
);
|
||||
assert_eq!(result, None);
|
||||
assert!(result.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -1711,7 +1722,7 @@ mod tests {
|
|||
no_kw,
|
||||
no_kw_present,
|
||||
);
|
||||
assert_eq!(result.unwrap().payload_args, &[1]);
|
||||
assert_eq!(result[0].payload_args, &[1]);
|
||||
|
||||
// parseFromString: payload is arg 0
|
||||
let result = classify_gated_sink(
|
||||
|
|
@ -1727,7 +1738,7 @@ mod tests {
|
|||
no_kw,
|
||||
no_kw_present,
|
||||
);
|
||||
assert_eq!(result.unwrap().payload_args, &[0]);
|
||||
assert_eq!(result[0].payload_args, &[0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -1745,7 +1756,7 @@ mod tests {
|
|||
no_kw,
|
||||
no_kw_present,
|
||||
);
|
||||
assert_eq!(result, None);
|
||||
assert!(result.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -1764,12 +1775,12 @@ mod tests {
|
|||
|kw| kw == "shell",
|
||||
);
|
||||
assert_eq!(
|
||||
result,
|
||||
Some(GateMatch {
|
||||
result.as_slice(),
|
||||
&[GateMatch {
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
payload_args: [0usize].as_slice(),
|
||||
object_destination_fields: &[],
|
||||
})
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -1788,7 +1799,7 @@ mod tests {
|
|||
},
|
||||
|kw| kw == "shell",
|
||||
);
|
||||
assert_eq!(result, None);
|
||||
assert!(result.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -1797,12 +1808,12 @@ mod tests {
|
|||
// literal available → unknown activation → ALL_ARGS_PAYLOAD sentinel.
|
||||
let result = classify_gated_sink("python", "Popen", |_| None, |_| None, no_kw_present);
|
||||
assert_eq!(
|
||||
result,
|
||||
Some(GateMatch {
|
||||
result.as_slice(),
|
||||
&[GateMatch {
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
payload_args: ALL_ARGS_PAYLOAD,
|
||||
object_destination_fields: &[],
|
||||
})
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -1825,12 +1836,12 @@ mod tests {
|
|||
|kw| kw == "shell",
|
||||
);
|
||||
assert_eq!(
|
||||
result,
|
||||
Some(GateMatch {
|
||||
result.as_slice(),
|
||||
&[GateMatch {
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
payload_args: [0usize].as_slice(),
|
||||
object_destination_fields: &[],
|
||||
})
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -1850,7 +1861,7 @@ mod tests {
|
|||
},
|
||||
|kw| kw == "shell",
|
||||
);
|
||||
assert_eq!(result, None);
|
||||
assert!(result.is_empty());
|
||||
}
|
||||
|
||||
/// `subprocess.run(cmd)` → no shell kwarg → presence-aware gate suppresses.
|
||||
|
|
@ -1864,7 +1875,7 @@ mod tests {
|
|||
|_| None,
|
||||
no_kw_present,
|
||||
);
|
||||
assert_eq!(result, None);
|
||||
assert!(result.is_empty());
|
||||
}
|
||||
|
||||
/// `subprocess.run(cmd, shell=flag)` → shell kwarg present but dynamic →
|
||||
|
|
@ -1880,12 +1891,12 @@ mod tests {
|
|||
|kw| kw == "shell",
|
||||
);
|
||||
assert_eq!(
|
||||
result,
|
||||
Some(GateMatch {
|
||||
result.as_slice(),
|
||||
&[GateMatch {
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
payload_args: ALL_ARGS_PAYLOAD,
|
||||
object_destination_fields: &[],
|
||||
})
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -1893,18 +1904,18 @@ mod tests {
|
|||
/// verbatim for the caller to apply object-literal field filtering.
|
||||
#[test]
|
||||
fn gated_sink_destination_positional_always_fires() {
|
||||
// `fetch(url)` — arg 0 is the URL (positional destination) OR an
|
||||
// `fetch(url)`, arg 0 is the URL (positional destination) OR an
|
||||
// object with a `url` field. The gate fires unconditionally, with
|
||||
// `url` declared as the object-literal destination-field for the
|
||||
// `fetch({url, body})` shape.
|
||||
let result = classify_gated_sink(
|
||||
"javascript",
|
||||
"fetch",
|
||||
|_| None, // no literal — Destination mode doesn't inspect it
|
||||
|_| None, // no literal, Destination mode doesn't inspect it
|
||||
no_kw,
|
||||
no_kw_present,
|
||||
);
|
||||
let m = result.expect("fetch gate should fire");
|
||||
let m = find_match_with_caps(&result, Cap::SSRF).expect("fetch SSRF gate should fire");
|
||||
assert_eq!(m.label, DataLabel::Sink(Cap::SSRF));
|
||||
assert_eq!(m.payload_args, &[0]);
|
||||
assert_eq!(m.object_destination_fields, &["url"]);
|
||||
|
|
@ -1914,10 +1925,13 @@ mod tests {
|
|||
/// the CFG caller to drive object-literal field filtering.
|
||||
#[test]
|
||||
fn gated_sink_destination_object_fields_surfaced() {
|
||||
// `http.request(opts, cb)` — opts is an object with destination fields.
|
||||
// `http.request(opts, cb)`, opts is an object with destination fields.
|
||||
let result =
|
||||
classify_gated_sink("javascript", "http.request", |_| None, no_kw, no_kw_present);
|
||||
let m = result.expect("http.request gate should fire");
|
||||
let m = result
|
||||
.first()
|
||||
.copied()
|
||||
.expect("http.request gate should fire");
|
||||
assert_eq!(m.label, DataLabel::Sink(Cap::SSRF));
|
||||
assert_eq!(m.payload_args, &[0]);
|
||||
assert!(
|
||||
|
|
@ -1929,6 +1943,27 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// `fetch` carries both SSRF (URL flow) and `DATA_EXFIL` (body / headers /
|
||||
/// json flow) gates. Both must fire from a single classify call so the
|
||||
/// downstream CFG can build per-cap filters.
|
||||
#[test]
|
||||
fn gated_sink_fetch_emits_ssrf_and_data_exfil() {
|
||||
let result = classify_gated_sink("javascript", "fetch", |_| None, no_kw, no_kw_present);
|
||||
let ssrf = find_match_with_caps(&result, Cap::SSRF).expect("SSRF gate fires");
|
||||
assert_eq!(ssrf.label, DataLabel::Sink(Cap::SSRF));
|
||||
assert_eq!(ssrf.payload_args, &[0]);
|
||||
assert_eq!(ssrf.object_destination_fields, &["url"]);
|
||||
|
||||
let exfil = find_match_with_caps(&result, Cap::DATA_EXFIL).expect("DATA_EXFIL gate fires");
|
||||
assert_eq!(exfil.label, DataLabel::Sink(Cap::DATA_EXFIL));
|
||||
assert_eq!(exfil.payload_args, &[1]);
|
||||
assert!(
|
||||
exfil.object_destination_fields.contains(&"body"),
|
||||
"expected body in DATA_EXFIL destination fields, got {:?}",
|
||||
exfil.object_destination_fields,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_all_single_label() {
|
||||
let result = classify_all("javascript", "innerHTML", None);
|
||||
|
|
|
|||
|
|
@ -106,6 +106,19 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sanitizer(Cap::URL_ENCODE),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// SQLAlchemy bound-parameter sanitizer. Values passed as keyword
|
||||
// arguments to `text("…:name…").bindparams(name=value)` are bound
|
||||
// by the driver, so injection cannot break out of the literal
|
||||
// context. The accompanying SQL-string check (py.sqli.text_format)
|
||||
// already flags the `text(f"…")` shape at construction, so this
|
||||
// sanitizer only clears flow when the SQL is a literal and the
|
||||
// values reach the engine via bindparams. Recognises both the
|
||||
// method form (`text(…).bindparams(...)`) and the bare call form.
|
||||
LabelRule {
|
||||
matchers: &["bindparams", ".bindparams"],
|
||||
label: DataLabel::Sanitizer(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Path canonicalization
|
||||
LabelRule {
|
||||
matchers: &["os.path.abspath", "os.path.normpath"],
|
||||
|
|
@ -119,7 +132,7 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::CODE_EXEC),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Jinja2 / string.Template — tainted template string enables SSTI
|
||||
// Jinja2 / string.Template, tainted template string enables SSTI
|
||||
LabelRule {
|
||||
matchers: &["Template"],
|
||||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
|
|
@ -141,7 +154,7 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Flask Markup — bypasses auto-escaping
|
||||
// Flask Markup, bypasses auto-escaping
|
||||
LabelRule {
|
||||
matchers: &["Markup"],
|
||||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
|
|
@ -216,7 +229,7 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// aiohttp HTTP client — SSRF sinks
|
||||
// aiohttp HTTP client, SSRF sinks
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"aiohttp.get",
|
||||
|
|
@ -228,6 +241,30 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Type-qualified SSRF sinks: when the receiver is tracked as
|
||||
// TypeKind::HttpClient (e.g. `client = requests.Session()`,
|
||||
// `client = httpx.Client()`, or `s = aiohttp.ClientSession()`),
|
||||
// resolve_type_qualified_labels() constructs `"HttpClient.<method>"`
|
||||
// call texts so the receiver-name is no longer load-bearing. Matches
|
||||
// the existing Rust HttpClient.<method> sink set so both languages
|
||||
// stay in step on the type-aware SSRF model. Motivated by the
|
||||
// upstream LMDeploy CVE-2026-33626 shape:
|
||||
// client = requests.Session()
|
||||
// response = client.get(url, ...)
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"HttpClient.get",
|
||||
"HttpClient.post",
|
||||
"HttpClient.put",
|
||||
"HttpClient.delete",
|
||||
"HttpClient.patch",
|
||||
"HttpClient.head",
|
||||
"HttpClient.request",
|
||||
"HttpClient.send",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"pickle.loads",
|
||||
|
|
@ -256,7 +293,7 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::ValueMatch,
|
||||
},
|
||||
// subprocess.run(cmd, shell=True) — multi-kwarg gate using the new
|
||||
// subprocess.run(cmd, shell=True), multi-kwarg gate using the new
|
||||
// presence-aware mechanism. Payload is arg 1 (after receiver offset
|
||||
// applied by the CFG layer when the call is modelled method-style).
|
||||
SinkGate {
|
||||
|
|
@ -361,7 +398,7 @@ pub fn framework_rules(ctx: &FrameworkContext) -> Vec<RuntimeLabelRule> {
|
|||
let mut rules = Vec::new();
|
||||
|
||||
if ctx.has(DetectedFramework::Django) {
|
||||
// QuerySet.extra() — raw SQL injection risk.
|
||||
// QuerySet.extra(), raw SQL injection risk.
|
||||
// Framework-conditional because `extra` is too generic as a static matcher.
|
||||
rules.push(RuntimeLabelRule {
|
||||
matchers: vec!["extra".into()],
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Rails request object — user-controlled HTTP request data.
|
||||
// Rails request object, user-controlled HTTP request data.
|
||||
// Dotted matchers work via push_node receiver.method text construction
|
||||
// (confirmed by existing Net::HTTP.get matcher in ssrf_net_http fixture).
|
||||
LabelRule {
|
||||
|
|
@ -75,7 +75,7 @@ pub static RULES: &[LabelRule] = &[
|
|||
},
|
||||
// Bare `Kernel#open(path)` interprets a path beginning with `|` as a
|
||||
// shell command (`open("|cmd")` runs `cmd`). `=open` exact-matcher
|
||||
// syntax limits this rule to the bare call — `File.open`, `IO.open`,
|
||||
// syntax limits this rule to the bare call, `File.open`, `IO.open`,
|
||||
// `URI.open` etc. each have their own non-pipe semantics and are
|
||||
// covered by their own labels (or intentionally not labeled as CMDI).
|
||||
// CVE-2020-8130 (rake `Rake::FileList#egrep`) was the canonical
|
||||
|
|
@ -99,7 +99,7 @@ pub static RULES: &[LabelRule] = &[
|
|||
// File I/O sinks: user-controlled paths flowing into File.open/File.new
|
||||
// are a path-traversal / arbitrary-read vector. File.open also participates
|
||||
// in the resource-lifecycle acquire/release pair (cfg_analysis::RUBY_RESOURCES),
|
||||
// so this entry is additive — it does not disturb resource-leak detection.
|
||||
// so this entry is additive, it does not disturb resource-leak detection.
|
||||
LabelRule {
|
||||
matchers: &["File.open", "File.new", "File.read", "IO.read"],
|
||||
label: DataLabel::Sink(Cap::FILE_IO),
|
||||
|
|
@ -115,7 +115,7 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// URI.open is the network-capable Kernel#open wrapper — more specific than
|
||||
// URI.open is the network-capable Kernel#open wrapper, more specific than
|
||||
// plain `open` (excluded to avoid file I/O false positives).
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
|
|
@ -140,7 +140,7 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::DESERIALIZE),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Reflection / dynamic class resolution — arbitrary class instantiation from
|
||||
// Reflection / dynamic class resolution, arbitrary class instantiation from
|
||||
// user-controlled names enables gadget chains (similar risk profile to
|
||||
// deserialization). Rails adds `constantize`/`safe_constantize` to String.
|
||||
LabelRule {
|
||||
|
|
@ -157,7 +157,7 @@ pub static RULES: &[LabelRule] = &[
|
|||
// SQL injection: ActiveRecord query methods that accept raw SQL strings.
|
||||
// `where` and `order` are the most common Rails SQLi vectors when called
|
||||
// with string interpolation (e.g., User.where("name = '#{params[:name]}'")).
|
||||
// Broad matchers — verified against fixture fallout.
|
||||
// Broad matchers, verified against fixture fallout.
|
||||
LabelRule {
|
||||
matchers: &["where", "order", "group", "having", "joins", "pluck"],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
|
|
@ -240,7 +240,7 @@ pub static PARAM_CONFIG: ParamConfig = ParamConfig {
|
|||
|
||||
/// ActiveRecord query methods that the static [`RULES`] table classifies as
|
||||
/// `Sink(Cap::SQL_QUERY)`. These are SQL injection vectors only when arg 0
|
||||
/// is a string with interpolation (`#{x}`) or a non-literal identifier — the
|
||||
/// is a string with interpolation (`#{x}`) or a non-literal identifier, the
|
||||
/// hash form (`where(id: x)`) and the parameterised form (`where("a = ?", x)`)
|
||||
/// are intrinsically safe because Rails escapes the values.
|
||||
const AR_QUERY_METHOD_NAMES: &[&str] = &["where", "order", "group", "having", "joins", "pluck"];
|
||||
|
|
@ -249,7 +249,7 @@ const AR_QUERY_METHOD_NAMES: &[&str] = &["where", "order", "group", "having", "j
|
|||
/// shape-safe. Hash literals (`pair`, `hash`), symbol literals
|
||||
/// (`simple_symbol`, `hash_key_symbol`), array literals (`array`), and pure
|
||||
/// string literals without `#{...}` interpolation are all safe. Strings WITH
|
||||
/// interpolation and identifiers / method calls are *not* in this list —
|
||||
/// interpolation and identifiers / method calls are *not* in this list ,
|
||||
/// callers must check `has_interpolation` and the kind separately.
|
||||
const AR_QUERY_SAFE_ARG0_KINDS: &[&str] = &[
|
||||
"pair",
|
||||
|
|
@ -270,15 +270,15 @@ const AR_QUERY_SAFE_ARG0_KINDS: &[&str] = &[
|
|||
/// `cfg-unguarded-sink` (sanitiser dominates the sink reflexively).
|
||||
///
|
||||
/// Real-world FP shapes this closes (redmine, mastodon, diaspora):
|
||||
/// * `Issue.where(:id => params[:id])` — hash form
|
||||
/// * `Model.where(id: x, name: y)` — keyword-shorthand pairs
|
||||
/// * `Project.order(:created_at)` — symbol literal
|
||||
/// * `Issue.pluck(:id, :name)` — symbol literals
|
||||
/// * `Model.where("active = ?", x)` — parameterised string
|
||||
/// * `Issue.where(:id => params[:id])`, hash form
|
||||
/// * `Model.where(id: x, name: y)`, keyword-shorthand pairs
|
||||
/// * `Project.order(:created_at)`, symbol literal
|
||||
/// * `Issue.pluck(:id, :name)`, symbol literals
|
||||
/// * `Model.where("active = ?", x)`, parameterised string
|
||||
///
|
||||
/// Real-world TPs preserved:
|
||||
/// * `User.where("name = '#{name}'")` — string with interpolation
|
||||
/// * `Model.where(some_string_var)` — dynamic identifier (conservative)
|
||||
/// * `User.where("name = '#{name}'")`, string with interpolation
|
||||
/// * `Model.where(some_string_var)`, dynamic identifier (conservative)
|
||||
pub fn ar_query_safe_shape(callee_text: &str, arg0_kind: &str, has_interpolation: bool) -> bool {
|
||||
// Match the callee's last segment ("Model.where" → "where", "where" → "where").
|
||||
let leaf = callee_text.rsplit(['.', ':']).next().unwrap_or(callee_text);
|
||||
|
|
@ -297,7 +297,7 @@ pub fn framework_rules(ctx: &FrameworkContext) -> Vec<RuntimeLabelRule> {
|
|||
let mut rules = Vec::new();
|
||||
|
||||
if ctx.has(DetectedFramework::Rails) {
|
||||
// Strong parameters — permit/require sanitize user input
|
||||
// Strong parameters, permit/require sanitize user input
|
||||
rules.push(RuntimeLabelRule {
|
||||
matchers: vec!["permit".into(), "require".into()],
|
||||
label: DataLabel::Sanitizer(Cap::all()),
|
||||
|
|
@ -306,7 +306,7 @@ pub fn framework_rules(ctx: &FrameworkContext) -> Vec<RuntimeLabelRule> {
|
|||
}
|
||||
|
||||
if ctx.has(DetectedFramework::Sinatra) {
|
||||
// Sinatra template rendering — user content flows to rendered output
|
||||
// Sinatra template rendering, user content flows to rendered output
|
||||
rules.push(RuntimeLabelRule {
|
||||
matchers: vec!["erb".into(), "haml".into()],
|
||||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
|
|
@ -323,7 +323,7 @@ mod ar_query_tests {
|
|||
|
||||
#[test]
|
||||
fn hash_form_is_safe() {
|
||||
// Model.where(:id => x) — pair node directly in argument_list
|
||||
// Model.where(:id => x) , pair node directly in argument_list
|
||||
assert!(ar_query_safe_shape("Model.where", "pair", false));
|
||||
// Model.where(id: x)
|
||||
assert!(ar_query_safe_shape("where", "pair", false));
|
||||
|
|
@ -338,32 +338,32 @@ mod ar_query_tests {
|
|||
|
||||
#[test]
|
||||
fn parameterised_string_is_safe() {
|
||||
// Model.where("a = ?", x) — first arg is a string literal w/o interpolation
|
||||
// Model.where("a = ?", x) , first arg is a string literal w/o interpolation
|
||||
assert!(ar_query_safe_shape("where", "string", false));
|
||||
assert!(ar_query_safe_shape("where", "string_literal", false));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn interpolated_string_is_dangerous() {
|
||||
// Model.where("a = #{x}") — string node WITH interpolation child
|
||||
// Model.where("a = #{x}") , string node WITH interpolation child
|
||||
assert!(!ar_query_safe_shape("where", "string", true));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dynamic_identifier_is_dangerous() {
|
||||
// Model.where(some_var) — kind is identifier, not in safe list
|
||||
// Model.where(some_var), kind is identifier, not in safe list
|
||||
assert!(!ar_query_safe_shape("where", "identifier", false));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn array_form_is_safe() {
|
||||
// Model.pluck([:id, :name]) — uncommon but valid
|
||||
// Model.pluck([:id, :name]), uncommon but valid
|
||||
assert!(ar_query_safe_shape("pluck", "array", false));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_ar_method_is_never_suppressed() {
|
||||
// find_by_sql is a real raw-SQL sink — never suppress.
|
||||
// find_by_sql is a real raw-SQL sink, never suppress.
|
||||
assert!(!ar_query_safe_shape("find_by_sql", "string", false));
|
||||
assert!(!ar_query_safe_shape("connection.execute", "pair", false));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -168,7 +168,7 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
"expression_statement" => Kind::CallWrapper,
|
||||
"assignment_expression" => Kind::Assignment,
|
||||
|
||||
// struct expressions — recurse so env::var() calls inside field
|
||||
// struct expressions, recurse so env::var() calls inside field
|
||||
// initialisers produce Source-labelled CFG nodes (needed for summaries).
|
||||
"struct_expression" => Kind::Block,
|
||||
"field_initializer_list" => Kind::Block,
|
||||
|
|
@ -287,7 +287,7 @@ pub fn framework_rules(ctx: &FrameworkContext) -> Vec<RuntimeLabelRule> {
|
|||
rules
|
||||
}
|
||||
|
||||
/// Phase C: auth-as-taint label rules for Rust. Gated by
|
||||
/// auth-as-taint label rules for Rust. Gated by
|
||||
/// `config.scanner.enable_auth_as_taint`; appended to the runtime rule set
|
||||
/// when the flag is enabled. These declare **sinks** (state-changing or
|
||||
/// outbound operations that should not be reached by an un-checked
|
||||
|
|
@ -343,10 +343,8 @@ pub fn phase_c_auth_rules() -> Vec<RuntimeLabelRule> {
|
|||
case_sensitive: false,
|
||||
},
|
||||
// ── Sanitizers clearing Cap::UNAUTHORIZED_ID ──
|
||||
// Ownership and membership guards from the auth_analysis default
|
||||
// `authorization_check_names` list. Phase C consumes these via
|
||||
// call-site argument sanitization (see
|
||||
// `is_auth_as_taint_arg_sanitizer` in ssa_transfer).
|
||||
// Ownership and membership guards consumed via call-site
|
||||
// argument sanitization (see `is_auth_as_taint_arg_sanitizer`).
|
||||
RuntimeLabelRule {
|
||||
matchers: vec![
|
||||
"check_ownership".into(),
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sanitizer(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// he library — HTML entity encoding
|
||||
// he library, HTML entity encoding
|
||||
LabelRule {
|
||||
matchers: &["he.encode", "he.escape"],
|
||||
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
|
||||
|
|
@ -131,7 +131,7 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// ── Outbound HTTP clients — modeled as destination-aware gated sinks ──
|
||||
// ── Outbound HTTP clients, modeled as destination-aware gated sinks ──
|
||||
// See GATED_SINKS below; rationale mirrors javascript.rs.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
|
|
@ -206,6 +206,14 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ── Cross-boundary data exfiltration (DATA_EXFIL) ─────────────────────
|
||||
// See javascript.rs for rationale. `xhr.send(body)` resolves to
|
||||
// `HttpClient.send` via type-qualified resolution.
|
||||
LabelRule {
|
||||
matchers: &["HttpClient.send", "XMLHttpRequest.prototype.send"],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ─────────── SQL injection sinks ─────────────
|
||||
// Database drivers: mysql, mysql2, pg, better-sqlite3
|
||||
LabelRule {
|
||||
|
|
@ -283,7 +291,7 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::ValueMatch,
|
||||
},
|
||||
// ── Outbound HTTP clients (SSRF) — see javascript.rs for rationale ────
|
||||
// ── Outbound HTTP clients (SSRF), see javascript.rs for rationale ────
|
||||
SinkGate {
|
||||
callee_matcher: "fetch",
|
||||
arg_index: 0,
|
||||
|
|
@ -452,6 +460,24 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
object_destination_fields: &["host", "hostname", "path", "protocol", "port", "origin"],
|
||||
},
|
||||
},
|
||||
// ── Cross-boundary data exfiltration ──────────────────────────────────
|
||||
// `fetch(input, init)`, payload-bearing fields of `init` (arg 1) flow
|
||||
// into the request body / headers / json, distinct from SSRF on the URL
|
||||
// (arg 0). See javascript.rs for full rationale.
|
||||
SinkGate {
|
||||
callee_matcher: "fetch",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["body", "headers", "json"],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue