Python fp and docs updtes (#58)

* refactor: Update comments for clarity and add expectations.json files for performance metrics

* feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks

* feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks

* refactor: Simplify code formatting for better readability in multiple files

* refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration

* feat: Update Java and Python patterns to include new security rules

* refactor: Improve comment clarity and consistency across multiple Rust files

* refactor: Simplify code formatting for improved readability in integration tests and module files

* refactor: Improve comment formatting and enhance clarity in assertions across multiple files
This commit is contained in:
Eli Peter 2026-04-29 19:53:34 -04:00 committed by GitHub
parent 4db0805de6
commit a438886217
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
291 changed files with 9485 additions and 3851 deletions

View file

@ -115,8 +115,8 @@ pub static PARAM_CONFIG: ParamConfig = ParamConfig {
/// Benchmark-driven output-parameter source positions for known C APIs.
/// Maps callee name → argument positions that receive Source taint.
pub static OUTPUT_PARAM_SOURCES: &[(&str, &[usize])] = &[
("fgets", &[0]), // fgets(buf, size, stream) buf receives input
("gets", &[0]), // gets(buf) buf receives input
("fgets", &[0]), // fgets(buf, size, stream), buf receives input
("gets", &[0]), // gets(buf), buf receives input
("recv", &[1]), // recv(fd, buf, len, flags)
("recvfrom", &[1]), // recvfrom(fd, buf, len, flags, ...)
];

View file

@ -120,7 +120,7 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
// and extract them as separate bodies. Without these, a
// `class_specifier` / `struct_specifier` falls through to the
// generic `_ =>` arm in `build_sub`, which records a leaf `Seq`
// node and never walks the body so inline member-function
// node and never walks the body, so inline member-function
// definitions (and methods of nested classes) are silently dropped.
"declaration_list" => Kind::Block,
"field_declaration_list" => Kind::Block,
@ -160,7 +160,7 @@ pub static PARAM_CONFIG: ParamConfig = ParamConfig {
/// Benchmark-driven output-parameter source positions for known C++ APIs.
pub static OUTPUT_PARAM_SOURCES: &[(&str, &[usize])] = &[
("getline", &[1]), // std::getline(stream, str) str receives input
("getline", &[1]), // std::getline(stream, str), str receives input
("std::getline", &[1]),
("fgets", &[0]),
("gets", &[0]),

View file

@ -66,7 +66,7 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
},
// fmt.Printf/Sprintf write to stdout or build strings in memory not
// fmt.Printf/Sprintf write to stdout or build strings in memory, not
// security sinks. fmt.Fprintf writes to an io.Writer (often http.ResponseWriter)
// so it IS a security sink for XSS.
LabelRule {
@ -110,7 +110,7 @@ pub static RULES: &[LabelRule] = &[
// Idiomatic Go SSRF sinks (Owncast CVE-2023-3188) use the
// `http.DefaultClient.Get(url)` form rather than the bare
// `http.Get(url)` helper, so the suffix-matched callee text needs
// an explicit entry here bare `Get/Post/Do/Head` would
// an explicit entry here, bare `Get/Post/Do/Head` would
// over-match unrelated method names.
"http.DefaultClient.Get",
"http.DefaultClient.Post",

View file

@ -53,13 +53,13 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sanitizer(Cap::URL_ENCODE),
case_sensitive: false,
},
// OWASP ESAPI input validator validates and canonicalizes input
// OWASP ESAPI input validator, validates and canonicalizes input
LabelRule {
matchers: &["Validator.getValidInput"],
label: DataLabel::Sanitizer(Cap::all()),
case_sensitive: false,
},
// Type-check sanitizers parsing to a primitive erases taint
// Type-check sanitizers, parsing to a primitive erases taint
LabelRule {
matchers: &[
"Integer.parseInt",
@ -99,7 +99,7 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::CODE_EXEC),
case_sensitive: false,
},
// HTTP response sinks println/print are broad (also match System.out)
// HTTP response sinks, println/print are broad (also match System.out)
// but necessary to catch response.getWriter().println() via suffix matching.
LabelRule {
matchers: &["println", "print"],
@ -107,7 +107,7 @@ pub static RULES: &[LabelRule] = &[
case_sensitive: false,
},
// openConnection() is the standard java.net.URL API for initiating a connection.
// It is the correct interception point the URL is already set on the object.
// It is the correct interception point, the URL is already set on the object.
LabelRule {
matchers: &[
"openConnection",
@ -153,9 +153,9 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
},
// NOTE: Java logging (logger.info, log.warn, etc.) removed as sinks
// NOTE: Java logging (logger.info, log.warn, etc.) removed as sinks ,
// logging format injection is not a real security vulnerability in Java.
// String.format also removed it builds strings in memory (not a sink);
// String.format also removed, it builds strings in memory (not a sink);
// the real sink is wherever the formatted string is used (SQL, HTTP, etc.).
// ─── JNDI injection sinks ───
LabelRule {

View file

@ -36,7 +36,7 @@ pub static RULES: &[LabelRule] = &[
case_sensitive: false,
},
// `encodeURIComponent` percent-encodes every character outside the
// ASCII identifier alphabet, including `<`, `>`, `&`, `"`, `'` so
// ASCII identifier alphabet, including `<`, `>`, `&`, `"`, `'`, so
// the result is safe to embed in HTML text content and HTML
// attribute values, not just URL components. Treating it as
// covering both URL_ENCODE and HTML_ESCAPE caps avoids FPs when a
@ -92,7 +92,7 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sanitizer(Cap::SHELL_ESCAPE),
case_sensitive: false,
},
// he library HTML entity encoding
// he library, HTML entity encoding
LabelRule {
matchers: &["he.encode", "he.escape"],
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
@ -148,16 +148,16 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
case_sensitive: true,
},
// ── Outbound HTTP clients modeled as destination-aware gated sinks ──
// ── Outbound HTTP clients, modeled as destination-aware gated sinks ──
// Flat-Sink modeling of fetch/axios/got/undici/http.request was producing
// a dominant FP class where any tainted body/payload arg appeared as SSRF
// (e.g. `fetch("/api/telemetry", { body: navigator.userAgent })`). SSRF
// semantics require attacker control over the *destination*, not the
// payload. The gated entries in `GATED_SINKS` below narrow activation to
// URL / host / path / origin arguments or object fields. Taint flowing
// only to body / data / json / headers is no longer flagged as SSRF —
// cross-boundary data-exfiltration detection is a separate future
// capability (`Cap::DATA_EXFIL`, not yet introduced).
// payload. The gated entries in `GATED_SINKS` below narrow SSRF
// activation to URL / host / path / origin arguments or object fields.
// Taint flowing only to body / data / json / headers is captured by a
// *separate* gate class (`Cap::DATA_EXFIL`) so the two can coexist on
// the same callee without one over-flagging the other.
// Express response sinks
LabelRule {
matchers: &["res.send", "res.json"],
@ -222,6 +222,21 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: false,
},
// ── Cross-boundary data exfiltration (DATA_EXFIL) ─────────────────────
//
// `XMLHttpRequest.prototype.send(body)`, when the receiver type is
// tracked back to `new XMLHttpRequest()`, the SSA engine's type-qualified
// resolver converts `xhr.send` to `HttpClient.send`; matching that form
// fires DATA_EXFIL on tainted body flow. The explicit
// `XMLHttpRequest.prototype.send.apply(...)` form is also covered. The
// `fetch` body / headers / json case is covered by the gated entry in
// `GATED_SINKS` (so SSRF on the URL and DATA_EXFIL on the payload can
// coexist on a single call site).
LabelRule {
matchers: &["HttpClient.send", "XMLHttpRequest.prototype.send"],
label: DataLabel::Sink(Cap::DATA_EXFIL),
case_sensitive: false,
},
// ─────────── SQL injection sinks ─────────────
// Database drivers: mysql, mysql2, pg, better-sqlite3
LabelRule {
@ -314,7 +329,7 @@ pub static GATED_SINKS: &[SinkGate] = &[
// only to body / data / json / headers / payload is silenced. See the
// commentary at the top of RULES for the rationale.
//
// `fetch(input, init)` arg 0 can be a URL string OR a Request/config
// `fetch(input, init)`, arg 0 can be a URL string OR a Request/config
// object with `url`. Per WHATWG Fetch, when `input` is a dictionary, the
// URL field is canonically `url`. Init-object body/headers at arg 1 are
// *not* destination-bearing.
@ -332,7 +347,7 @@ pub static GATED_SINKS: &[SinkGate] = &[
object_destination_fields: &["url"],
},
},
// `axios(config)` / `axios.request(config)` config object exposes
// `axios(config)` / `axios.request(config)`, config object exposes
// `url` and `baseURL`. Body-ish fields (`data`, `params`, `headers`)
// are excluded.
SinkGate {
@ -363,7 +378,7 @@ pub static GATED_SINKS: &[SinkGate] = &[
object_destination_fields: &["url", "baseURL"],
},
},
// `axios.get(url[, config])` arg 0 is URL; arg 1 is config.
// `axios.get(url[, config])`, arg 0 is URL; arg 1 is config.
SinkGate {
callee_matcher: "axios.get",
arg_index: 0,
@ -378,7 +393,7 @@ pub static GATED_SINKS: &[SinkGate] = &[
object_destination_fields: &[],
},
},
// `axios.post(url, data[, config])` arg 0 is URL; `data` at arg 1 is
// `axios.post(url, data[, config])`, arg 0 is URL; `data` at arg 1 is
// the request body and must NOT activate SSRF.
SinkGate {
callee_matcher: "axios.post",
@ -394,7 +409,7 @@ pub static GATED_SINKS: &[SinkGate] = &[
object_destination_fields: &[],
},
},
// `axios.put / axios.patch / axios.delete` follow the same shape
// `axios.put / axios.patch / axios.delete` follow the same shape ,
// (url, data?, config?). Keep the model consistent across verbs.
SinkGate {
callee_matcher: "axios.put",
@ -438,7 +453,7 @@ pub static GATED_SINKS: &[SinkGate] = &[
object_destination_fields: &[],
},
},
// `got(url[, options])` / `got(options)` options exposes `url` and
// `got(url[, options])` / `got(options)`, options exposes `url` and
// `prefixUrl`. Body-ish fields (`body`, `json`, `form`, `searchParams`,
// `headers`) are excluded.
SinkGate {
@ -455,7 +470,7 @@ pub static GATED_SINKS: &[SinkGate] = &[
object_destination_fields: &["url", "prefixUrl"],
},
},
// `undici.request(url | opts[, opts])` opts exposes `origin` and
// `undici.request(url | opts[, opts])`, opts exposes `origin` and
// `path`. Body-ish fields (`body`, `headers`) are excluded.
SinkGate {
callee_matcher: "undici.request",
@ -471,11 +486,11 @@ pub static GATED_SINKS: &[SinkGate] = &[
object_destination_fields: &["origin", "path"],
},
},
// Node `http.request(options[, cb])` / `https.request(options[, cb])`
// Node `http.request(options[, cb])` / `https.request(options[, cb])` ,
// options exposes `host`, `hostname`, `path`, `protocol`, `port`,
// `origin`. Body is sent via `.write()`/`.end()` on the returned
// ClientRequest, so it never appears as a positional arg here.
// Arg 0 may also be a URL string the "whole arg is destination"
// Arg 0 may also be a URL string, the "whole arg is destination"
// fallback (triggered when arg 0 is not an object literal) covers that.
SinkGate {
callee_matcher: "http.request",
@ -505,7 +520,7 @@ pub static GATED_SINKS: &[SinkGate] = &[
object_destination_fields: &["host", "hostname", "path", "protocol", "port", "origin"],
},
},
// Node `http.get(options[, cb])` / `https.get(options[, cb])`
// Node `http.get(options[, cb])` / `https.get(options[, cb])` ,
// convenience wrappers around `.request()` that auto-call `.end()`.
// Same destination semantics as `.request`. Motivated by
// CVE-2025-64430 (Parse Server SSRF via http.get(uri)).
@ -537,6 +552,31 @@ pub static GATED_SINKS: &[SinkGate] = &[
object_destination_fields: &["host", "hostname", "path", "protocol", "port", "origin"],
},
},
// ── Cross-boundary data exfiltration ──────────────────────────────────
//
// Sensitive data flowing into the *payload* of an outbound request is a
// distinct vulnerability class from SSRF: the destination is fixed but
// attacker-influenced bytes leave the process via the request body /
// headers / json field. These gates fire on the body-bearing positions
// and emit `Cap::DATA_EXFIL`, which is intentionally separate from
// `Cap::SSRF` so a `fetch(taintedUrl, {body: tainted})` site reports
// both classes independently.
//
// `fetch(input, init)`, `init` at arg 1 carries body / headers / json.
SinkGate {
callee_matcher: "fetch",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::DATA_EXFIL),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &["body", "headers", "json"],
},
},
];
pub static KINDS: Map<&'static str, Kind> = phf_map! {

View file

@ -32,7 +32,7 @@ pub struct LabelRule {
/// expands it to `(0..arity)` using the actual call arity.
///
/// The value `usize::MAX` is used because `args.get(usize::MAX)` is a guaranteed
/// miss for any real argument list an accidental direct-lookup would be a no-op
/// miss for any real argument list, an accidental direct-lookup would be a no-op
/// rather than silently aliasing position 0.
pub const ALL_ARGS_PAYLOAD: &[usize] = &[usize::MAX];
@ -54,7 +54,7 @@ pub enum GateActivation {
/// arg selects the MIME type).
ValueMatch,
/// Destination-bearing flow activation. The gate fires when taint reaches
/// a declared destination location at the call site no literal
/// a declared destination location at the call site, no literal
/// inspection, no prefix heuristic.
///
/// For callees whose destination is a positional argument (e.g. `fetch`'s
@ -80,7 +80,7 @@ pub enum GateActivation {
}
/// Argument-sensitive sink activation. Whether a call becomes a sink is
/// determined by the gate's [`GateActivation`] mode literal-value matching
/// determined by the gate's [`GateActivation`] mode, literal-value matching
/// for traditional role-selector APIs, or destination-flow activation for
/// outbound HTTP clients and other APIs where a specific location in the
/// call carries the attacker-controlled destination.
@ -144,6 +144,13 @@ bitflags! {
/// carrier cap for folding `auth_analysis` into the SSA/taint
/// engine.
const UNAUTHORIZED_ID = 0b0001_0000_0000_0000; // bit 12
/// Cross-boundary data-exfiltration: tainted sensitive data flowing
/// into outbound request bodies, headers, or other payload-bearing
/// fields of network egress APIs. Distinct from `SSRF` (attacker
/// control over the destination URL), `DATA_EXFIL` fires when the
/// destination is fixed but attacker-influenced data leaves the
/// process via the request payload.
const DATA_EXFIL = 0b0010_0000_0000_0000; // bit 13
}
}
@ -192,7 +199,7 @@ pub enum Kind {
/// reachability does not depend on sibling-case execution order.
Switch,
Trivia,
/// Simple sequential expression (e.g. cast/type-assertion) treated like
/// Simple sequential expression (e.g. cast/type-assertion), treated like
/// any other sequential statement in the CFG but explicitly classified so
/// code that inspects `Kind` can recognise it.
Seq,
@ -472,9 +479,9 @@ pub enum SourceKind {
FileSystem,
/// Database query results
Database,
/// Caught exception may carry user-controlled data
/// Caught exception, may carry user-controlled data
CaughtException,
/// Could not determine treat conservatively
/// Could not determine, treat conservatively
Unknown,
}
@ -511,7 +518,7 @@ pub fn infer_source_kind(caps: Cap, callee: &str) -> SourceKind {
// File system patterns
if cl.contains("read") || cl.contains("fopen") || cl.contains("open") {
// Distinguish from db reads file reads typically have FILE_IO cap
// Distinguish from db reads, file reads typically have FILE_IO cap
if caps.contains(Cap::FILE_IO) {
return SourceKind::FileSystem;
}
@ -570,6 +577,7 @@ pub fn parse_cap(s: &str) -> Option<Cap> {
"code_exec" => Some(Cap::CODE_EXEC),
"crypto" => Some(Cap::CRYPTO),
"unauthorized_id" => Some(Cap::UNAUTHORIZED_ID),
"data_exfil" | "data_exfiltration" => Some(Cap::DATA_EXFIL),
"all" => Some(Cap::all()),
_ => None,
}
@ -621,7 +629,7 @@ pub fn build_lang_rules(
Vec::new()
};
// Phase C: fold `auth_analysis` into the taint engine by injecting
// fold `auth_analysis` into the taint engine by injecting
// `Cap::UNAUTHORIZED_ID` sink/sanitizer rules. Gated by config; default
// OFF so the standalone `auth_analysis` subsystem remains authoritative.
if config.scanner.enable_auth_as_taint {
@ -636,7 +644,7 @@ pub fn build_lang_rules(
}
}
/// Return Phase C auth-as-taint rules for a given language (currently Rust-only).
/// Return the auth-as-taint rules for a given language (Rust-only).
fn phase_c_auth_rules_for_lang(lang_slug: &str) -> Vec<RuntimeLabelRule> {
match lang_slug {
"rust" | "rs" => rust::phase_c_auth_rules(),
@ -718,7 +726,7 @@ fn match_suffix_cs(text: &[u8], matcher: &[u8], case_sensitive: bool) -> bool {
if exact_only {
// `=foo` matchers fire only when `text` IS `foo` (no `Mod.foo`,
// `Class::foo`, or any preceding namespace). Lets a label rule
// distinguish bare `Kernel#open` from `File.open` the former
// distinguish bare `Kernel#open` from `File.open`, the former
// shells out on `|cmd`, the latter never does (CVE-2020-8130).
start == 0
} else {
@ -731,7 +739,7 @@ fn match_suffix_cs(text: &[u8], matcher: &[u8], case_sensitive: bool) -> bool {
/// Strip an optional `=` "exact-match" sigil from the start of a matcher.
/// Matchers prefixed with `=` (e.g. `"=open"`) only fire when the candidate
/// text equals the matcher exactly the boundary-`.`-or-`:` allowance is
/// text equals the matcher exactly, the boundary-`.`-or-`:` allowance is
/// suppressed. Used to distinguish bare-callee Ruby/Python builtins from
/// methods of the same name on a typed receiver.
#[inline]
@ -767,7 +775,7 @@ pub fn classify(lang: &str, text: &str, extra: Option<&[RuntimeLabelRule]>) -> O
let full_normalized = normalize_chained_call(text);
let full_norm_bytes = full_normalized.as_bytes();
// ── Check runtime (config) rules first they take priority ──────
// ── Check runtime (config) rules first, they take priority ──────
if let Some(extras) = extra {
// Pass 1: exact / suffix
for rule in extras {
@ -865,7 +873,7 @@ pub fn classify_all(
}
}
// ── Check runtime (config) rules first they take priority ──────
// ── Check runtime (config) rules first, they take priority ──────
if let Some(extras) = extra {
// Pass 1: exact / suffix
for rule in extras {
@ -941,7 +949,7 @@ pub fn classify_all(
/// (or [`ALL_ARGS_PAYLOAD`] for dynamic-activation conservative fallback).
/// `object_destination_fields`, when non-empty, restricts sink-taint checks
/// to identifiers found under those field names within an object-literal
/// positional argument used by destination-aware outbound-HTTP gates so
/// positional argument, used by destination-aware outbound-HTTP gates so
/// `fetch({url, body})` fires only when taint reaches `url`, not `body`.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct GateMatch {
@ -952,9 +960,13 @@ pub struct GateMatch {
/// Classify a call against gated sink rules.
///
/// Returns `Some(GateMatch)` if the callee matches a gated rule AND the
/// activation conditions fire. Returns `None` if the callee doesn't match
/// any gated rule, or matches but the activation is provably safe.
/// Returns every gate whose callee matches AND whose activation conditions
/// fire. An empty result means the callee did not match any gated rule, or
/// every match was provably safe. Multiple matches are possible when the
/// same callee carries gates for different sink classes, e.g. `fetch` is
/// both an SSRF gate (URL flow) and a `DATA_EXFIL` gate (body / headers /
/// json flow); each gate carries its own [`GateMatch`] so downstream code
/// can attribute findings per-cap.
///
/// `const_arg_at` extracts positional argument values.
/// `const_keyword_arg` extracts keyword argument values (for languages like Python).
@ -964,11 +976,15 @@ pub fn classify_gated_sink(
const_arg_at: impl Fn(usize) -> Option<String>,
const_keyword_arg: impl Fn(&str) -> Option<String>,
kwarg_present: impl Fn(&str) -> bool,
) -> Option<GateMatch> {
let gates = GATED_REGISTRY.get(lang).or_else(|| {
) -> SmallVec<[GateMatch; 2]> {
let mut out: SmallVec<[GateMatch; 2]> = SmallVec::new();
let gates = match GATED_REGISTRY.get(lang).or_else(|| {
let key = lang.to_ascii_lowercase();
GATED_REGISTRY.get(key.as_str())
})?;
}) {
Some(g) => g,
None => return out,
};
let callee_bytes = callee_text.as_bytes();
@ -985,11 +1001,12 @@ pub fn classify_gated_sink(
object_destination_fields,
} = gate.activation
{
return Some(GateMatch {
out.push(GateMatch {
label: gate.label,
payload_args: gate.payload_args,
object_destination_fields,
});
continue;
}
// ── ValueMatch activation (legacy) ───────────────────────────────
@ -1012,7 +1029,7 @@ pub fn classify_gated_sink(
any_dangerous = true;
break;
}
// Present with a safe literal continue checking other kwargs.
// Present with a safe literal, continue checking other kwargs.
}
None => {
any_dynamic_present = true;
@ -1020,23 +1037,25 @@ pub fn classify_gated_sink(
}
}
if any_dangerous {
return Some(GateMatch {
out.push(GateMatch {
label: gate.label,
payload_args: gate.payload_args,
object_destination_fields: &[],
});
continue;
}
if any_dynamic_present {
// Dynamic kwarg value we can't prove safe. Conservatively
// Dynamic kwarg value, we can't prove safe. Conservatively
// flag every positional arg so the activation pathway isn't
// silently narrowed to the gate's declared `payload_args`.
return Some(GateMatch {
out.push(GateMatch {
label: gate.label,
payload_args: ALL_ARGS_PAYLOAD,
object_destination_fields: &[],
});
continue;
}
return None; // all listed kwargs absent or safe-literal → suppress
continue; // all listed kwargs absent or safe-literal → suppress
}
// Single-kwarg / positional gate path (original semantics).
@ -1058,22 +1077,22 @@ pub fn classify_gated_sink(
.iter()
.any(|p| lower.starts_with(&p.to_ascii_lowercase()));
if is_dangerous {
return Some(GateMatch {
out.push(GateMatch {
label: gate.label,
payload_args: gate.payload_args,
object_destination_fields: &[],
});
}
return None; // safe constant → suppress
// safe constant → suppress (no push)
}
// Unknown / dynamic activation arg: the gate fires conservatively,
// but we can't prove that only the declared `payload_args` carry
// risk a tainted activation arg (e.g. `setAttribute(userAttr, …)`
// risk, a tainted activation arg (e.g. `setAttribute(userAttr, …)`
// where `userAttr` is user-controlled) is itself a vulnerability
// path. Return ALL_ARGS_PAYLOAD so downstream sink scanning
// considers every positional argument.
None => {
return Some(GateMatch {
out.push(GateMatch {
label: gate.label,
payload_args: ALL_ARGS_PAYLOAD,
object_destination_fields: &[],
@ -1081,7 +1100,7 @@ pub fn classify_gated_sink(
}
}
}
None
out
}
/// Public wrapper for [`normalize_chained_call`] so callers outside the module
@ -1090,25 +1109,11 @@ pub fn normalize_chained_call_for_classify(text: &str) -> String {
normalize_chained_call(text)
}
/// Return the bare method-name segment of a callee text.
///
/// Centralised replacement for the textual `callee.rsplit('.').next().unwrap_or(callee)`
/// pattern that used to be scattered across the codebase.
///
/// Behaviour-preserving across the Phase 2 SSA chain decomposition rollout:
/// - When SSA lowering rewrites a chained-receiver call (`c.mu.Lock()` →
/// `Call("Lock", [v_mu])`), the call's `callee` is already the bare method
/// name, so this helper is a no-op pass-through.
/// - For 1-dot callees (`obj.method`) and for languages where Phase 2 lowering
/// doesn't run yet (PHP/Ruby) the helper still extracts the trailing method
/// from the textual form, exactly as the old per-callsite split did.
/// - For bare callees (no dot), it returns the input unchanged.
///
/// Use this helper when you need the *terminal* method name from a callee
/// string regardless of whether the call had a chained receiver. When you
/// have an `SsaOp::Call` in hand, prefer reading `callee` directly and
/// walking `receiver` through `FieldProj` ops — that's the precise path.
/// This helper is the textual fallback for callsites that only see a `&str`.
/// Return the bare method-name segment of a callee text. Returns the
/// input unchanged for bare callees. When you have an `SsaOp::Call`,
/// prefer reading `callee` directly and walking `receiver` through
/// `FieldProj` ops, this helper is the textual fallback for callsites
/// that only see a `&str`.
pub fn bare_method_name(callee: &str) -> &str {
callee.rsplit('.').next().unwrap_or(callee)
}
@ -1314,19 +1319,15 @@ mod tests {
fn bare_method_name_strips_chain() {
// No-dot input → returned as-is.
assert_eq!(bare_method_name("foo"), "foo");
// 1-dot → trailing segment (Phase 2 leaves these alone in SSA).
// 1-dot → trailing segment.
assert_eq!(bare_method_name("obj.method"), "method");
// Multi-dot → trailing segment (matches AST-only callees from
// PHP/Ruby and any pre-Phase-2 textual paths kept around in
// `callee_text` for display).
// Multi-dot → trailing segment.
assert_eq!(bare_method_name("a.b.c.method"), "method");
// Trailing dot → empty trailing segment, matching the legacy
// `rsplit('.').next()` behaviour bit-for-bit.
// Trailing dot → empty trailing segment.
assert_eq!(bare_method_name("foo."), "");
// Empty input.
assert_eq!(bare_method_name(""), "");
// Phase 2 invariant: when SSA decomposed a chain, `callee` is
// the bare method already and the helper is a no-op.
// SSA-decomposed chains pass through untouched.
assert_eq!(bare_method_name("Lock"), "Lock");
}
@ -1399,7 +1400,7 @@ mod tests {
#[test]
fn classify_bare_href_is_none() {
// Bare "href" should NOT be a sink only "location.href" and variants
// Bare "href" should NOT be a sink, only "location.href" and variants
let result = classify("javascript", "href", None);
assert_eq!(result, None);
}
@ -1497,7 +1498,7 @@ mod tests {
#[test]
fn classify_go_user_client_get_is_not_ssrf_sink() {
// `client.Get` on a user-named *http.Client variable should NOT
// match the Go SSRF set is restricted to the stdlib package
// match, the Go SSRF set is restricted to the stdlib package
// helper `http.DefaultClient`. Type-aware resolution would be the
// path to a broader rule, not a bare-name match.
let result = classify("go", "client.Get", None);
@ -1530,7 +1531,7 @@ mod tests {
#[test]
fn classify_ruby_io_open_is_not_shell_escape_sink() {
// `IO.open` takes a file descriptor never pipes. The bare-
// `IO.open` takes a file descriptor, never pipes. The bare-
// open CMDI rule must leave it alone.
let result = classify("ruby", "IO.open", None);
assert_ne!(result, Some(DataLabel::Sink(Cap::SHELL_ESCAPE)));
@ -1572,7 +1573,7 @@ mod tests {
#[test]
fn classify_cpp_sto_family_is_sanitizer() {
// Phase 1: full `std::sto*` family (including 64-bit and `long
// full `std::sto*` family (including 64-bit and `long
// double` variants) clears every taint cap that flows through it,
// matching the existing `std::stoi`/`std::stol` rule.
for callee in [
@ -1621,6 +1622,16 @@ mod tests {
false
}
/// Find the first matching gate whose label sink-caps overlap `caps`.
/// Lets tests target a specific gate when a callee carries multiple
/// (e.g. `fetch` is both an SSRF and a `DATA_EXFIL` gate).
fn find_match_with_caps(matches: &[GateMatch], caps: Cap) -> Option<GateMatch> {
matches
.iter()
.find(|m| matches!(m.label, DataLabel::Sink(c) if c.intersects(caps)))
.copied()
}
#[test]
fn gated_sink_dangerous_exact() {
let result = classify_gated_sink(
@ -1631,12 +1642,12 @@ mod tests {
no_kw_present,
);
assert_eq!(
result,
Some(GateMatch {
result.as_slice(),
&[GateMatch {
label: DataLabel::Sink(Cap::HTML_ESCAPE),
payload_args: [1usize].as_slice(),
object_destination_fields: &[],
})
}]
);
}
@ -1650,12 +1661,12 @@ mod tests {
no_kw_present,
);
assert_eq!(
result,
Some(GateMatch {
result.as_slice(),
&[GateMatch {
label: DataLabel::Sink(Cap::HTML_ESCAPE),
payload_args: [1usize].as_slice(),
object_destination_fields: &[],
})
}]
);
}
@ -1668,24 +1679,24 @@ mod tests {
no_kw,
no_kw_present,
);
assert_eq!(result, None);
assert!(result.is_empty());
}
#[test]
fn gated_sink_dynamic_conservative() {
// Dynamic activation (e.g. `setAttribute(attrVar, val)`) returns the
// ALL_ARGS_PAYLOAD sentinel so callers expand payload tracking to
// every positional arg the activation arg itself is a vulnerability
// every positional arg, the activation arg itself is a vulnerability
// path when attacker-controlled.
let result =
classify_gated_sink("javascript", "setAttribute", |_| None, no_kw, no_kw_present);
assert_eq!(
result,
Some(GateMatch {
result.as_slice(),
&[GateMatch {
label: DataLabel::Sink(Cap::HTML_ESCAPE),
payload_args: ALL_ARGS_PAYLOAD,
object_destination_fields: &[],
})
}]
);
}
@ -1698,7 +1709,7 @@ mod tests {
no_kw,
no_kw_present,
);
assert_eq!(result, None);
assert!(result.is_empty());
}
#[test]
@ -1711,7 +1722,7 @@ mod tests {
no_kw,
no_kw_present,
);
assert_eq!(result.unwrap().payload_args, &[1]);
assert_eq!(result[0].payload_args, &[1]);
// parseFromString: payload is arg 0
let result = classify_gated_sink(
@ -1727,7 +1738,7 @@ mod tests {
no_kw,
no_kw_present,
);
assert_eq!(result.unwrap().payload_args, &[0]);
assert_eq!(result[0].payload_args, &[0]);
}
#[test]
@ -1745,7 +1756,7 @@ mod tests {
no_kw,
no_kw_present,
);
assert_eq!(result, None);
assert!(result.is_empty());
}
#[test]
@ -1764,12 +1775,12 @@ mod tests {
|kw| kw == "shell",
);
assert_eq!(
result,
Some(GateMatch {
result.as_slice(),
&[GateMatch {
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
payload_args: [0usize].as_slice(),
object_destination_fields: &[],
})
}]
);
}
@ -1788,7 +1799,7 @@ mod tests {
},
|kw| kw == "shell",
);
assert_eq!(result, None);
assert!(result.is_empty());
}
#[test]
@ -1797,12 +1808,12 @@ mod tests {
// literal available → unknown activation → ALL_ARGS_PAYLOAD sentinel.
let result = classify_gated_sink("python", "Popen", |_| None, |_| None, no_kw_present);
assert_eq!(
result,
Some(GateMatch {
result.as_slice(),
&[GateMatch {
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
payload_args: ALL_ARGS_PAYLOAD,
object_destination_fields: &[],
})
}]
);
}
@ -1825,12 +1836,12 @@ mod tests {
|kw| kw == "shell",
);
assert_eq!(
result,
Some(GateMatch {
result.as_slice(),
&[GateMatch {
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
payload_args: [0usize].as_slice(),
object_destination_fields: &[],
})
}]
);
}
@ -1850,7 +1861,7 @@ mod tests {
},
|kw| kw == "shell",
);
assert_eq!(result, None);
assert!(result.is_empty());
}
/// `subprocess.run(cmd)` → no shell kwarg → presence-aware gate suppresses.
@ -1864,7 +1875,7 @@ mod tests {
|_| None,
no_kw_present,
);
assert_eq!(result, None);
assert!(result.is_empty());
}
/// `subprocess.run(cmd, shell=flag)` → shell kwarg present but dynamic →
@ -1880,12 +1891,12 @@ mod tests {
|kw| kw == "shell",
);
assert_eq!(
result,
Some(GateMatch {
result.as_slice(),
&[GateMatch {
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
payload_args: ALL_ARGS_PAYLOAD,
object_destination_fields: &[],
})
}]
);
}
@ -1893,18 +1904,18 @@ mod tests {
/// verbatim for the caller to apply object-literal field filtering.
#[test]
fn gated_sink_destination_positional_always_fires() {
// `fetch(url)` arg 0 is the URL (positional destination) OR an
// `fetch(url)`, arg 0 is the URL (positional destination) OR an
// object with a `url` field. The gate fires unconditionally, with
// `url` declared as the object-literal destination-field for the
// `fetch({url, body})` shape.
let result = classify_gated_sink(
"javascript",
"fetch",
|_| None, // no literal Destination mode doesn't inspect it
|_| None, // no literal, Destination mode doesn't inspect it
no_kw,
no_kw_present,
);
let m = result.expect("fetch gate should fire");
let m = find_match_with_caps(&result, Cap::SSRF).expect("fetch SSRF gate should fire");
assert_eq!(m.label, DataLabel::Sink(Cap::SSRF));
assert_eq!(m.payload_args, &[0]);
assert_eq!(m.object_destination_fields, &["url"]);
@ -1914,10 +1925,13 @@ mod tests {
/// the CFG caller to drive object-literal field filtering.
#[test]
fn gated_sink_destination_object_fields_surfaced() {
// `http.request(opts, cb)` opts is an object with destination fields.
// `http.request(opts, cb)`, opts is an object with destination fields.
let result =
classify_gated_sink("javascript", "http.request", |_| None, no_kw, no_kw_present);
let m = result.expect("http.request gate should fire");
let m = result
.first()
.copied()
.expect("http.request gate should fire");
assert_eq!(m.label, DataLabel::Sink(Cap::SSRF));
assert_eq!(m.payload_args, &[0]);
assert!(
@ -1929,6 +1943,27 @@ mod tests {
);
}
/// `fetch` carries both SSRF (URL flow) and `DATA_EXFIL` (body / headers /
/// json flow) gates. Both must fire from a single classify call so the
/// downstream CFG can build per-cap filters.
#[test]
fn gated_sink_fetch_emits_ssrf_and_data_exfil() {
let result = classify_gated_sink("javascript", "fetch", |_| None, no_kw, no_kw_present);
let ssrf = find_match_with_caps(&result, Cap::SSRF).expect("SSRF gate fires");
assert_eq!(ssrf.label, DataLabel::Sink(Cap::SSRF));
assert_eq!(ssrf.payload_args, &[0]);
assert_eq!(ssrf.object_destination_fields, &["url"]);
let exfil = find_match_with_caps(&result, Cap::DATA_EXFIL).expect("DATA_EXFIL gate fires");
assert_eq!(exfil.label, DataLabel::Sink(Cap::DATA_EXFIL));
assert_eq!(exfil.payload_args, &[1]);
assert!(
exfil.object_destination_fields.contains(&"body"),
"expected body in DATA_EXFIL destination fields, got {:?}",
exfil.object_destination_fields,
);
}
#[test]
fn classify_all_single_label() {
let result = classify_all("javascript", "innerHTML", None);

View file

@ -106,6 +106,19 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sanitizer(Cap::URL_ENCODE),
case_sensitive: false,
},
// SQLAlchemy bound-parameter sanitizer. Values passed as keyword
// arguments to `text("…:name…").bindparams(name=value)` are bound
// by the driver, so injection cannot break out of the literal
// context. The accompanying SQL-string check (py.sqli.text_format)
// already flags the `text(f"…")` shape at construction, so this
// sanitizer only clears flow when the SQL is a literal and the
// values reach the engine via bindparams. Recognises both the
// method form (`text(…).bindparams(...)`) and the bare call form.
LabelRule {
matchers: &["bindparams", ".bindparams"],
label: DataLabel::Sanitizer(Cap::SQL_QUERY),
case_sensitive: false,
},
// Path canonicalization
LabelRule {
matchers: &["os.path.abspath", "os.path.normpath"],
@ -119,7 +132,7 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::CODE_EXEC),
case_sensitive: false,
},
// Jinja2 / string.Template tainted template string enables SSTI
// Jinja2 / string.Template, tainted template string enables SSTI
LabelRule {
matchers: &["Template"],
label: DataLabel::Sink(Cap::HTML_ESCAPE),
@ -141,7 +154,7 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::HTML_ESCAPE),
case_sensitive: false,
},
// Flask Markup bypasses auto-escaping
// Flask Markup, bypasses auto-escaping
LabelRule {
matchers: &["Markup"],
label: DataLabel::Sink(Cap::HTML_ESCAPE),
@ -216,7 +229,7 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: false,
},
// aiohttp HTTP client SSRF sinks
// aiohttp HTTP client, SSRF sinks
LabelRule {
matchers: &[
"aiohttp.get",
@ -228,6 +241,30 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: false,
},
// Type-qualified SSRF sinks: when the receiver is tracked as
// TypeKind::HttpClient (e.g. `client = requests.Session()`,
// `client = httpx.Client()`, or `s = aiohttp.ClientSession()`),
// resolve_type_qualified_labels() constructs `"HttpClient.<method>"`
// call texts so the receiver-name is no longer load-bearing. Matches
// the existing Rust HttpClient.<method> sink set so both languages
// stay in step on the type-aware SSRF model. Motivated by the
// upstream LMDeploy CVE-2026-33626 shape:
// client = requests.Session()
// response = client.get(url, ...)
LabelRule {
matchers: &[
"HttpClient.get",
"HttpClient.post",
"HttpClient.put",
"HttpClient.delete",
"HttpClient.patch",
"HttpClient.head",
"HttpClient.request",
"HttpClient.send",
],
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: false,
},
LabelRule {
matchers: &[
"pickle.loads",
@ -256,7 +293,7 @@ pub static GATED_SINKS: &[SinkGate] = &[
dangerous_kwargs: &[],
activation: GateActivation::ValueMatch,
},
// subprocess.run(cmd, shell=True) multi-kwarg gate using the new
// subprocess.run(cmd, shell=True), multi-kwarg gate using the new
// presence-aware mechanism. Payload is arg 1 (after receiver offset
// applied by the CFG layer when the call is modelled method-style).
SinkGate {
@ -361,7 +398,7 @@ pub fn framework_rules(ctx: &FrameworkContext) -> Vec<RuntimeLabelRule> {
let mut rules = Vec::new();
if ctx.has(DetectedFramework::Django) {
// QuerySet.extra() raw SQL injection risk.
// QuerySet.extra(), raw SQL injection risk.
// Framework-conditional because `extra` is too generic as a static matcher.
rules.push(RuntimeLabelRule {
matchers: vec!["extra".into()],

View file

@ -14,7 +14,7 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Source(Cap::all()),
case_sensitive: false,
},
// Rails request object user-controlled HTTP request data.
// Rails request object, user-controlled HTTP request data.
// Dotted matchers work via push_node receiver.method text construction
// (confirmed by existing Net::HTTP.get matcher in ssrf_net_http fixture).
LabelRule {
@ -75,7 +75,7 @@ pub static RULES: &[LabelRule] = &[
},
// Bare `Kernel#open(path)` interprets a path beginning with `|` as a
// shell command (`open("|cmd")` runs `cmd`). `=open` exact-matcher
// syntax limits this rule to the bare call `File.open`, `IO.open`,
// syntax limits this rule to the bare call, `File.open`, `IO.open`,
// `URI.open` etc. each have their own non-pipe semantics and are
// covered by their own labels (or intentionally not labeled as CMDI).
// CVE-2020-8130 (rake `Rake::FileList#egrep`) was the canonical
@ -99,7 +99,7 @@ pub static RULES: &[LabelRule] = &[
// File I/O sinks: user-controlled paths flowing into File.open/File.new
// are a path-traversal / arbitrary-read vector. File.open also participates
// in the resource-lifecycle acquire/release pair (cfg_analysis::RUBY_RESOURCES),
// so this entry is additive it does not disturb resource-leak detection.
// so this entry is additive, it does not disturb resource-leak detection.
LabelRule {
matchers: &["File.open", "File.new", "File.read", "IO.read"],
label: DataLabel::Sink(Cap::FILE_IO),
@ -115,7 +115,7 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::HTML_ESCAPE),
case_sensitive: false,
},
// URI.open is the network-capable Kernel#open wrapper more specific than
// URI.open is the network-capable Kernel#open wrapper, more specific than
// plain `open` (excluded to avoid file I/O false positives).
LabelRule {
matchers: &[
@ -140,7 +140,7 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::DESERIALIZE),
case_sensitive: false,
},
// Reflection / dynamic class resolution arbitrary class instantiation from
// Reflection / dynamic class resolution, arbitrary class instantiation from
// user-controlled names enables gadget chains (similar risk profile to
// deserialization). Rails adds `constantize`/`safe_constantize` to String.
LabelRule {
@ -157,7 +157,7 @@ pub static RULES: &[LabelRule] = &[
// SQL injection: ActiveRecord query methods that accept raw SQL strings.
// `where` and `order` are the most common Rails SQLi vectors when called
// with string interpolation (e.g., User.where("name = '#{params[:name]}'")).
// Broad matchers verified against fixture fallout.
// Broad matchers, verified against fixture fallout.
LabelRule {
matchers: &["where", "order", "group", "having", "joins", "pluck"],
label: DataLabel::Sink(Cap::SQL_QUERY),
@ -240,7 +240,7 @@ pub static PARAM_CONFIG: ParamConfig = ParamConfig {
/// ActiveRecord query methods that the static [`RULES`] table classifies as
/// `Sink(Cap::SQL_QUERY)`. These are SQL injection vectors only when arg 0
/// is a string with interpolation (`#{x}`) or a non-literal identifier the
/// is a string with interpolation (`#{x}`) or a non-literal identifier, the
/// hash form (`where(id: x)`) and the parameterised form (`where("a = ?", x)`)
/// are intrinsically safe because Rails escapes the values.
const AR_QUERY_METHOD_NAMES: &[&str] = &["where", "order", "group", "having", "joins", "pluck"];
@ -249,7 +249,7 @@ const AR_QUERY_METHOD_NAMES: &[&str] = &["where", "order", "group", "having", "j
/// shape-safe. Hash literals (`pair`, `hash`), symbol literals
/// (`simple_symbol`, `hash_key_symbol`), array literals (`array`), and pure
/// string literals without `#{...}` interpolation are all safe. Strings WITH
/// interpolation and identifiers / method calls are *not* in this list
/// interpolation and identifiers / method calls are *not* in this list ,
/// callers must check `has_interpolation` and the kind separately.
const AR_QUERY_SAFE_ARG0_KINDS: &[&str] = &[
"pair",
@ -270,15 +270,15 @@ const AR_QUERY_SAFE_ARG0_KINDS: &[&str] = &[
/// `cfg-unguarded-sink` (sanitiser dominates the sink reflexively).
///
/// Real-world FP shapes this closes (redmine, mastodon, diaspora):
/// * `Issue.where(:id => params[:id])` hash form
/// * `Model.where(id: x, name: y)` keyword-shorthand pairs
/// * `Project.order(:created_at)` symbol literal
/// * `Issue.pluck(:id, :name)` symbol literals
/// * `Model.where("active = ?", x)` parameterised string
/// * `Issue.where(:id => params[:id])`, hash form
/// * `Model.where(id: x, name: y)`, keyword-shorthand pairs
/// * `Project.order(:created_at)`, symbol literal
/// * `Issue.pluck(:id, :name)`, symbol literals
/// * `Model.where("active = ?", x)`, parameterised string
///
/// Real-world TPs preserved:
/// * `User.where("name = '#{name}'")` string with interpolation
/// * `Model.where(some_string_var)` dynamic identifier (conservative)
/// * `User.where("name = '#{name}'")`, string with interpolation
/// * `Model.where(some_string_var)`, dynamic identifier (conservative)
pub fn ar_query_safe_shape(callee_text: &str, arg0_kind: &str, has_interpolation: bool) -> bool {
// Match the callee's last segment ("Model.where" → "where", "where" → "where").
let leaf = callee_text.rsplit(['.', ':']).next().unwrap_or(callee_text);
@ -297,7 +297,7 @@ pub fn framework_rules(ctx: &FrameworkContext) -> Vec<RuntimeLabelRule> {
let mut rules = Vec::new();
if ctx.has(DetectedFramework::Rails) {
// Strong parameters permit/require sanitize user input
// Strong parameters, permit/require sanitize user input
rules.push(RuntimeLabelRule {
matchers: vec!["permit".into(), "require".into()],
label: DataLabel::Sanitizer(Cap::all()),
@ -306,7 +306,7 @@ pub fn framework_rules(ctx: &FrameworkContext) -> Vec<RuntimeLabelRule> {
}
if ctx.has(DetectedFramework::Sinatra) {
// Sinatra template rendering user content flows to rendered output
// Sinatra template rendering, user content flows to rendered output
rules.push(RuntimeLabelRule {
matchers: vec!["erb".into(), "haml".into()],
label: DataLabel::Sink(Cap::HTML_ESCAPE),
@ -323,7 +323,7 @@ mod ar_query_tests {
#[test]
fn hash_form_is_safe() {
// Model.where(:id => x) pair node directly in argument_list
// Model.where(:id => x) , pair node directly in argument_list
assert!(ar_query_safe_shape("Model.where", "pair", false));
// Model.where(id: x)
assert!(ar_query_safe_shape("where", "pair", false));
@ -338,32 +338,32 @@ mod ar_query_tests {
#[test]
fn parameterised_string_is_safe() {
// Model.where("a = ?", x) first arg is a string literal w/o interpolation
// Model.where("a = ?", x) , first arg is a string literal w/o interpolation
assert!(ar_query_safe_shape("where", "string", false));
assert!(ar_query_safe_shape("where", "string_literal", false));
}
#[test]
fn interpolated_string_is_dangerous() {
// Model.where("a = #{x}") string node WITH interpolation child
// Model.where("a = #{x}") , string node WITH interpolation child
assert!(!ar_query_safe_shape("where", "string", true));
}
#[test]
fn dynamic_identifier_is_dangerous() {
// Model.where(some_var) kind is identifier, not in safe list
// Model.where(some_var), kind is identifier, not in safe list
assert!(!ar_query_safe_shape("where", "identifier", false));
}
#[test]
fn array_form_is_safe() {
// Model.pluck([:id, :name]) uncommon but valid
// Model.pluck([:id, :name]), uncommon but valid
assert!(ar_query_safe_shape("pluck", "array", false));
}
#[test]
fn non_ar_method_is_never_suppressed() {
// find_by_sql is a real raw-SQL sink never suppress.
// find_by_sql is a real raw-SQL sink, never suppress.
assert!(!ar_query_safe_shape("find_by_sql", "string", false));
assert!(!ar_query_safe_shape("connection.execute", "pair", false));
}

View file

@ -168,7 +168,7 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
"expression_statement" => Kind::CallWrapper,
"assignment_expression" => Kind::Assignment,
// struct expressions recurse so env::var() calls inside field
// struct expressions, recurse so env::var() calls inside field
// initialisers produce Source-labelled CFG nodes (needed for summaries).
"struct_expression" => Kind::Block,
"field_initializer_list" => Kind::Block,
@ -287,7 +287,7 @@ pub fn framework_rules(ctx: &FrameworkContext) -> Vec<RuntimeLabelRule> {
rules
}
/// Phase C: auth-as-taint label rules for Rust. Gated by
/// auth-as-taint label rules for Rust. Gated by
/// `config.scanner.enable_auth_as_taint`; appended to the runtime rule set
/// when the flag is enabled. These declare **sinks** (state-changing or
/// outbound operations that should not be reached by an un-checked
@ -343,10 +343,8 @@ pub fn phase_c_auth_rules() -> Vec<RuntimeLabelRule> {
case_sensitive: false,
},
// ── Sanitizers clearing Cap::UNAUTHORIZED_ID ──
// Ownership and membership guards from the auth_analysis default
// `authorization_check_names` list. Phase C consumes these via
// call-site argument sanitization (see
// `is_auth_as_taint_arg_sanitizer` in ssa_transfer).
// Ownership and membership guards consumed via call-site
// argument sanitization (see `is_auth_as_taint_arg_sanitizer`).
RuntimeLabelRule {
matchers: vec![
"check_ownership".into(),

View file

@ -86,7 +86,7 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sanitizer(Cap::SHELL_ESCAPE),
case_sensitive: false,
},
// he library HTML entity encoding
// he library, HTML entity encoding
LabelRule {
matchers: &["he.encode", "he.escape"],
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
@ -131,7 +131,7 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
case_sensitive: true,
},
// ── Outbound HTTP clients modeled as destination-aware gated sinks ──
// ── Outbound HTTP clients, modeled as destination-aware gated sinks ──
// See GATED_SINKS below; rationale mirrors javascript.rs.
LabelRule {
matchers: &[
@ -206,6 +206,14 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: false,
},
// ── Cross-boundary data exfiltration (DATA_EXFIL) ─────────────────────
// See javascript.rs for rationale. `xhr.send(body)` resolves to
// `HttpClient.send` via type-qualified resolution.
LabelRule {
matchers: &["HttpClient.send", "XMLHttpRequest.prototype.send"],
label: DataLabel::Sink(Cap::DATA_EXFIL),
case_sensitive: false,
},
// ─────────── SQL injection sinks ─────────────
// Database drivers: mysql, mysql2, pg, better-sqlite3
LabelRule {
@ -283,7 +291,7 @@ pub static GATED_SINKS: &[SinkGate] = &[
dangerous_kwargs: &[],
activation: GateActivation::ValueMatch,
},
// ── Outbound HTTP clients (SSRF) see javascript.rs for rationale ────
// ── Outbound HTTP clients (SSRF), see javascript.rs for rationale ────
SinkGate {
callee_matcher: "fetch",
arg_index: 0,
@ -452,6 +460,24 @@ pub static GATED_SINKS: &[SinkGate] = &[
object_destination_fields: &["host", "hostname", "path", "protocol", "port", "origin"],
},
},
// ── Cross-boundary data exfiltration ──────────────────────────────────
// `fetch(input, init)`, payload-bearing fields of `init` (arg 1) flow
// into the request body / headers / json, distinct from SSRF on the URL
// (arg 0). See javascript.rs for full rationale.
SinkGate {
callee_matcher: "fetch",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::DATA_EXFIL),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &["body", "headers", "json"],
},
},
];
pub static KINDS: Map<&'static str, Kind> = phf_map! {