mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-15 20:05:13 +02:00
Added Cap::DATA_EXFIL and taint fp and fn fixes on real repos (#59)
* feat: Enhance data exfiltration detection with source sensitivity gating for cookies and headers * feat: Implement cross-file data exfiltration detection with parameter-specific gate filters * feat: Add calibration tests and refine DATA_EXFIL severity scoring logic * feat: Introduce per-detector configuration for data exfiltration suppression * feat: Enhance DATA_EXFIL findings with destination field tracking in diagnostics and SARIF output * feat: Add tainted body and URL handling for data exfiltration detection * feat: Add integration tests and fixtures for DATA_EXFIL and SSRF detection in Go * feat: Add Java integration tests and fixtures for DATA_EXFIL detection across multiple HTTP clients * feat: Add synthetic externals handling for closure-captured variables in SSA * feat: Implement closure-based suppression for resource leak findings * feat: Add regression guards for shell-injection and taint propagation in for-of destructure patterns * feat: Implement constructor cap narrowing for data exfiltration detection in HTTP request builders * feat: Add gated sinks for data exfiltration detection in C and C++ using curl_easy_setopt * feat: Implement DATA_EXFIL cap parity for backwards analysis and add integration tests * feat: Add data exfiltration sinks for various languages and enhance documentation * refactor: Simplify formatting and improve readability in various files * refactor: Improve readability by simplifying conditional statements and adding clippy linting * docs: Update CHANGELOG and comments for data exfiltration features and configuration * docs: Clarify configuration instructions for data exfiltration trusted destinations * docs: Enhance comments for evidence routing logic in data exfiltration
This commit is contained in:
parent
a438886217
commit
58f1794a4e
189 changed files with 8421 additions and 383 deletions
|
|
@ -1,4 +1,4 @@
|
|||
use crate::labels::{Cap, DataLabel, Kind, LabelRule, ParamConfig};
|
||||
use crate::labels::{Cap, DataLabel, GateActivation, Kind, LabelRule, ParamConfig, SinkGate};
|
||||
use phf::{Map, phf_map};
|
||||
|
||||
pub static RULES: &[LabelRule] = &[
|
||||
|
|
@ -69,6 +69,33 @@ pub static RULES: &[LabelRule] = &[
|
|||
},
|
||||
];
|
||||
|
||||
/// Gated sinks for C.
|
||||
///
|
||||
/// `curl_easy_setopt(handle, option, payload)` is libcurl's option-binding
|
||||
/// interface; the option identifier at arg 1 selects which slot the payload
|
||||
/// fills. `CURLOPT_POSTFIELDS` and `CURLOPT_COPYPOSTFIELDS` carry the
|
||||
/// request body, while other CURLOPT_* constants designate URL / auth / TLS
|
||||
/// behaviour and are not DATA_EXFIL-relevant. Gating on the macro identifier
|
||||
/// keeps the rule from over-firing on `curl_easy_setopt(h, CURLOPT_URL, url)`
|
||||
/// (covered separately by the `curl_easy_perform` SSRF flat sink).
|
||||
///
|
||||
/// Identifier-based activation is enabled via the macro-arg fallback in
|
||||
/// `cfg::mod::classify_gated_sink` for `lang == "c"`. Header-parsing
|
||||
/// libraries (e.g. libmicrohttpd, mongoose) lack a stable surface and are
|
||||
/// left to project-specific config.
|
||||
pub static GATED_SINKS: &[SinkGate] = &[SinkGate {
|
||||
callee_matcher: "curl_easy_setopt",
|
||||
arg_index: 1,
|
||||
dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: true,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::ValueMatch,
|
||||
}];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
// control-flow
|
||||
"if_statement" => Kind::If,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use crate::labels::{Cap, DataLabel, Kind, LabelRule, ParamConfig};
|
||||
use crate::labels::{Cap, DataLabel, GateActivation, Kind, LabelRule, ParamConfig, SinkGate};
|
||||
use phf::{Map, phf_map};
|
||||
|
||||
pub static RULES: &[LabelRule] = &[
|
||||
|
|
@ -91,6 +91,28 @@ pub static RULES: &[LabelRule] = &[
|
|||
},
|
||||
];
|
||||
|
||||
/// Gated sinks for C++.
|
||||
///
|
||||
/// Mirror of the C gate set: `curl_easy_setopt` with `CURLOPT_POSTFIELDS` /
|
||||
/// `CURLOPT_COPYPOSTFIELDS` at arg 1 binds the request body at arg 2.
|
||||
/// Identifier-based activation is enabled via the macro-arg fallback in
|
||||
/// `cfg::mod::classify_gated_sink` for `lang == "cpp" / "c++"`. Modern C++
|
||||
/// HTTP wrappers (cpr, Boost.Beast) layer over libcurl or directly over the
|
||||
/// socket; their ergonomic surfaces differ enough that adding gates per-
|
||||
/// library is left for a follow-up driven by the corpus.
|
||||
pub static GATED_SINKS: &[SinkGate] = &[SinkGate {
|
||||
callee_matcher: "curl_easy_setopt",
|
||||
arg_index: 1,
|
||||
dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: true,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::ValueMatch,
|
||||
}];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
// control-flow
|
||||
"if_statement" => Kind::If,
|
||||
|
|
|
|||
380
src/labels/go.rs
380
src/labels/go.rs
|
|
@ -1,11 +1,13 @@
|
|||
use crate::labels::{Cap, DataLabel, Kind, LabelRule, ParamConfig, RuntimeLabelRule};
|
||||
use crate::labels::{
|
||||
Cap, DataLabel, GateActivation, Kind, LabelRule, ParamConfig, RuntimeLabelRule, SinkGate,
|
||||
};
|
||||
use crate::utils::project::{DetectedFramework, FrameworkContext};
|
||||
use phf::{Map, phf_map};
|
||||
|
||||
pub static RULES: &[LabelRule] = &[
|
||||
// ─────────── Sources ───────────
|
||||
LabelRule {
|
||||
matchers: &["os.Getenv"],
|
||||
matchers: &["os.Getenv", "os.LookupEnv", "os.Environ"],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: false,
|
||||
},
|
||||
|
|
@ -16,8 +18,12 @@ pub static RULES: &[LabelRule] = &[
|
|||
"r.URL",
|
||||
"r.Body",
|
||||
"r.Header",
|
||||
"r.Header.Get",
|
||||
"r.Header.Values",
|
||||
"r.URL.Query",
|
||||
"r.URL.Query.Get",
|
||||
"r.Cookie",
|
||||
"r.Cookies",
|
||||
"Request.FormValue",
|
||||
"Request.URL",
|
||||
],
|
||||
|
|
@ -97,27 +103,20 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ── Outbound HTTP clients (SSRF) ───────────────────────────────────
|
||||
//
|
||||
// These are modeled as destination-aware gated sinks in `GATED_SINKS`
|
||||
// below. Flat Sink rules would over-flag every positional argument as
|
||||
// SSRF (so a tainted body in `http.Post(url, contentType, body)` would
|
||||
// fire SSRF on the body), and the gate machinery short-circuits when a
|
||||
// flat Sink label is already attached to the callee, blocking DATA_EXFIL
|
||||
// body-flow gates from running.
|
||||
//
|
||||
// `net.Dial` / `net.DialTimeout` keep their flat-sink modeling: the
|
||||
// first positional arg is the network address with no body / payload
|
||||
// companion, so the over-flag concern does not apply.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"http.Get",
|
||||
"http.Post",
|
||||
"http.Head",
|
||||
"http.NewRequest",
|
||||
"http.NewRequestWithContext",
|
||||
"net.Dial",
|
||||
"net.DialTimeout",
|
||||
// `http.DefaultClient` is the package-level default `*http.Client`.
|
||||
// Idiomatic Go SSRF sinks (Owncast CVE-2023-3188) use the
|
||||
// `http.DefaultClient.Get(url)` form rather than the bare
|
||||
// `http.Get(url)` helper, so the suffix-matched callee text needs
|
||||
// an explicit entry here, bare `Get/Post/Do/Head` would
|
||||
// over-match unrelated method names.
|
||||
"http.DefaultClient.Get",
|
||||
"http.DefaultClient.Post",
|
||||
"http.DefaultClient.Head",
|
||||
"http.DefaultClient.Do",
|
||||
"http.DefaultClient.PostForm",
|
||||
],
|
||||
matchers: &["net.Dial", "net.DialTimeout"],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
|
|
@ -135,6 +134,343 @@ pub static RULES: &[LabelRule] = &[
|
|||
},
|
||||
];
|
||||
|
||||
/// Argument-role-aware Go sinks. Two classes coexist on the outbound HTTP
|
||||
/// surface, mirroring the JS/TS modeling:
|
||||
///
|
||||
/// * SSRF on the URL-bearing position of a one-shot request (`http.Get`,
|
||||
/// `http.Post`, `http.NewRequest`, `http.DefaultClient.*`).
|
||||
/// * `Cap::DATA_EXFIL` on the body / payload position when the source is
|
||||
/// Sensitive (cookies, headers, env, db reads). Gates fire only when
|
||||
/// taint reaches the body argument, so a tainted URL alone never
|
||||
/// activates DATA_EXFIL and a tainted body alone never activates SSRF.
|
||||
///
|
||||
/// `http.NewRequest` / `http.NewRequestWithContext` carry an SSRF gate on
|
||||
/// their URL position only. In Go's two-step idiom the actual network
|
||||
/// call happens at `client.Do(req)`; body taint flows from the body
|
||||
/// argument through the returned `*http.Request` via default arg → return
|
||||
/// propagation, and then activates the `http.DefaultClient.Do` DATA_EXFIL
|
||||
/// gate below. Modeling NewRequest as a body propagator (rather than a
|
||||
/// body sink) avoids duplicate findings on the idiomatic
|
||||
/// `req, _ := http.NewRequest(...); client.Do(req)` shape.
|
||||
pub static GATED_SINKS: &[SinkGate] = &[
|
||||
// ── SSRF gates (URL-bearing position) ────────────────────────────────
|
||||
// `http.Get(url)` — url is arg 0.
|
||||
SinkGate {
|
||||
callee_matcher: "http.Get",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.Head(url)` — url is arg 0.
|
||||
SinkGate {
|
||||
callee_matcher: "http.Head",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.Post(url, contentType, body)` — url is arg 0.
|
||||
SinkGate {
|
||||
callee_matcher: "http.Post",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.PostForm(url, data)` — url is arg 0.
|
||||
SinkGate {
|
||||
callee_matcher: "http.PostForm",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.NewRequest(method, url, body)` — url is arg 1.
|
||||
SinkGate {
|
||||
callee_matcher: "http.NewRequest",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.NewRequestWithContext(ctx, method, url, body)` — url is arg 2.
|
||||
SinkGate {
|
||||
callee_matcher: "http.NewRequestWithContext",
|
||||
arg_index: 2,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.DefaultClient.Get(url)` / `.Head(url)` — url is arg 0.
|
||||
SinkGate {
|
||||
callee_matcher: "http.DefaultClient.Get",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "http.DefaultClient.Head",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.DefaultClient.Post(url, contentType, body)` — url is arg 0.
|
||||
SinkGate {
|
||||
callee_matcher: "http.DefaultClient.Post",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.DefaultClient.PostForm(url, data)` — url is arg 0.
|
||||
SinkGate {
|
||||
callee_matcher: "http.DefaultClient.PostForm",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// ── DATA_EXFIL gates (body-bearing position) ─────────────────────────
|
||||
// `http.Post(url, contentType, body)` — body is arg 2.
|
||||
SinkGate {
|
||||
callee_matcher: "http.Post",
|
||||
arg_index: 2,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.PostForm(url, data)` — `data` (arg 1) is `url.Values`. Form
|
||||
// bodies serialize the same operator state cookies / headers do, so a
|
||||
// tainted Sensitive value reaching the form payload is DATA_EXFIL.
|
||||
SinkGate {
|
||||
callee_matcher: "http.PostForm",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.DefaultClient.Do(req)` — `req` (arg 0) is the `*http.Request`
|
||||
// value. Body taint introduced via either `http.NewRequest(_, _, body)`
|
||||
// (default arg → return propagation) or a later `req.Body = body` field
|
||||
// write reaches this sink through the request value.
|
||||
SinkGate {
|
||||
callee_matcher: "http.DefaultClient.Do",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.DefaultClient.PostForm(url, data)` — same as `http.PostForm`
|
||||
// but invoked through the package-level default `*http.Client`.
|
||||
SinkGate {
|
||||
callee_matcher: "http.DefaultClient.PostForm",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.DefaultClient.Post(url, contentType, body)` — body is arg 2.
|
||||
SinkGate {
|
||||
callee_matcher: "http.DefaultClient.Post",
|
||||
arg_index: 2,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// ── Common third-party HTTP clients ─────────────────────────────────
|
||||
//
|
||||
// `go-resty/resty`: `client.R().SetBody(body).Post(url)` style.
|
||||
// `SetBody(body)` carries the body into the chained request; the
|
||||
// network call happens at the verb method. We model the verb
|
||||
// methods (Get / Post / Put / Patch / Delete / Send / Execute) as
|
||||
// DATA_EXFIL gates with `payload_args: &[]` (empty), which engages
|
||||
// the receiver-tainted fallback in `collect_tainted_sink_vars`. A
|
||||
// builder receiver carrying body taint from `SetBody` activates the
|
||||
// sink without us needing a positional body arg.
|
||||
SinkGate {
|
||||
callee_matcher: "resty.Request.Post",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "resty.Request.Put",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "resty.Request.Patch",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `imroc/req`: `req.Post(url, req.BodyJSON(payload))`, the `BodyJSON`
|
||||
// / `BodyXML` helpers wrap a tainted payload and pass it as arg 1+ of
|
||||
// the verb call. Since the helper return value carries the body
|
||||
// taint, gating the verb on every payload arg is sufficient.
|
||||
SinkGate {
|
||||
callee_matcher: "req.Post",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1, 2, 3],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "req.Put",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1, 2, 3],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
// control-flow
|
||||
"if_statement" => Kind::If,
|
||||
|
|
|
|||
|
|
@ -31,6 +31,15 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Sensitive operator state: HTTP session attributes commonly carry
|
||||
// auth tokens / CSRF tokens / signed user ids. Routed through the
|
||||
// `Cookie` source-kind heuristic so DATA_EXFIL fires when these
|
||||
// values leave the process via an outbound request body.
|
||||
LabelRule {
|
||||
matchers: &["HttpSession.getAttribute", "session.getAttribute"],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ───────── Sanitizers ──────────
|
||||
LabelRule {
|
||||
matchers: &["HtmlUtils.htmlEscape", "StringEscapeUtils.escapeHtml4"],
|
||||
|
|
@ -121,6 +130,79 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ── Cross-boundary data exfiltration ──────────────────────────────────
|
||||
//
|
||||
// Outbound HTTP egress points where a Sensitive source (cookie, header,
|
||||
// env, session attribute, db read) reaching the request body / payload
|
||||
// is a cross-boundary disclosure distinct from SSRF. The flat-rule
|
||||
// model relies on default arg → return propagation through builder
|
||||
// chains: `HttpRequest.newBuilder().uri(u).POST(BodyPublishers.ofString(p)).build()`
|
||||
// smears `p`-taint into the returned request, which then activates the
|
||||
// sink at `client.send(req)`.
|
||||
//
|
||||
// Type-qualified resolution maps `restTemplate.postForObject(...)` →
|
||||
// `HttpClient.postForObject` via the JAVA_HIERARCHY (RestTemplate,
|
||||
// OkHttpClient, WebClient, CloseableHttpClient all subtype HttpClient),
|
||||
// so a single set of `HttpClient.<method>` rules covers every framework
|
||||
// in scope. Plain user input is silenced by the source-sensitivity
|
||||
// gate in `effective_sink_caps`, so this fires only on cookies / headers
|
||||
// / env / session / db.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
// java.net.http: client.send(req) consumes a request that
|
||||
// carries body-taint via BodyPublishers.ofString/ofByteArray/
|
||||
// ofInputStream through the builder chain.
|
||||
"HttpClient.send",
|
||||
"HttpClient.sendAsync",
|
||||
// Spring RestTemplate verbs that take a body / entity.
|
||||
"postForObject",
|
||||
"postForEntity",
|
||||
"RestTemplate.exchange",
|
||||
"RestTemplate.put",
|
||||
"RestTemplate.patchForObject",
|
||||
// Apache HttpClient: httpClient.execute(req) where req is an
|
||||
// HttpPost / HttpPut / HttpPatch with .setEntity(StringEntity(p)).
|
||||
// CloseableHttpClient subtypes HttpClient so type-qualified
|
||||
// resolution rewrites client.execute → HttpClient.execute.
|
||||
"HttpClient.execute",
|
||||
// Spring WebClient body-binding step:
|
||||
// webClient.post().uri(u).bodyValue(payload).retrieve().
|
||||
// bodyValue is the explicit body-bind verb; default propagation
|
||||
// carries the tainted body into the chain return so the sink
|
||||
// attaches at the body-bind site itself (no cross-call needed).
|
||||
"bodyValue",
|
||||
// Apache HttpClient body-binding: the `setEntity` step on
|
||||
// HttpPost / HttpPut / HttpPatch mutates the request rather
|
||||
// than returning the builder, so the receiver's SSA value at
|
||||
// the later `httpClient.execute(req)` does not carry body
|
||||
// taint via the default smear (which threads through return
|
||||
// values, not field mutations). Firing DATA_EXFIL at the
|
||||
// setEntity call itself catches the body-binding directly.
|
||||
// The matcher is specific enough to avoid collisions —
|
||||
// `setEntity` is Apache-HttpClient-specific.
|
||||
"setEntity",
|
||||
// OkHttp builder body-binding shortcut: when the chain
|
||||
// doesn't roll through `.post(body).build()` (e.g. a helper
|
||||
// function returns the Builder mid-chain), `RequestBody`
|
||||
// is bound via `.post(body)` / `.put(body)` / `.patch(body)`
|
||||
// / `.delete(body)` directly on the Builder. These methods
|
||||
// also exist on unrelated classes (NIO, Streams) but in the
|
||||
// OkHttp idiom the receiver type is `Request.Builder`; the
|
||||
// receiver-type widening from `Request.Builder` → HttpClient
|
||||
// isn't currently modeled, so we fall back to suffix-name
|
||||
// matchers and accept some receiver-agnostic firing risk.
|
||||
// Conservative: omit these for v1 to avoid over-fire on
|
||||
// non-OkHttp `post`/`put`/`patch` calls.
|
||||
// OkHttp two-step: client.newCall(req).execute() / .enqueue().
|
||||
// Chain normalization strips `()` between dots so the tree-
|
||||
// sitter callee text `client.newCall(req).execute` matches the
|
||||
// suffix `newCall.execute` after normalization.
|
||||
"newCall.execute",
|
||||
"newCall.enqueue",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"readObject",
|
||||
|
|
|
|||
|
|
@ -98,6 +98,26 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Conventional forwarding wrappers, telemetry / analytics / metrics dispatch.
|
||||
// Treating these as Sanitizer(DATA_EXFIL) encodes the project convention
|
||||
// that a payload routed through a named forwarding boundary is an
|
||||
// explicit, expected egress (the developer named the function), not the
|
||||
// accidental cross-boundary leak DATA_EXFIL is meant to catch. Users who
|
||||
// do not follow this convention can override per-project via
|
||||
// [analysis.languages.javascript] custom rules; the convention is
|
||||
// documented in docs/detectors/taint.md so projects can extend it.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"serializeForUpstream",
|
||||
"forwardPayload",
|
||||
"tracker.send",
|
||||
"analytics.track",
|
||||
"metrics.report",
|
||||
"logEvent",
|
||||
],
|
||||
label: DataLabel::Sanitizer(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Conventional project-local HTML escapers. Suffix word-boundary match
|
||||
// fires on bare calls to locally defined helpers (`function escapeHtml(x)`
|
||||
// invoked as `escapeHtml(x)`) across codebases that follow the common
|
||||
|
|
@ -128,6 +148,23 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::URL_ENCODE),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Shell-exec sinks. Qualified `child_process.*` and bare destructured-
|
||||
// import forms (`exec`, `execSync`, `execFile`, ...) are both modeled as
|
||||
// flat sinks here so module-aliased call sites like `cp.exec(...)`
|
||||
// (where `cp = require('child_process')`) still fire via suffix match.
|
||||
// The bare-form FPs that motivated tightening are addressed elsewhere:
|
||||
//
|
||||
// * `container.exec(...)` (Dockerode) and `exec.start(...)` (the
|
||||
// resulting `exec` handle) — `container.exec` is excluded via the
|
||||
// EXCLUDES list below; `exec.start` is suppressed by restricting
|
||||
// `first_member_label`'s suffix-strip-and-retry to `Source` labels
|
||||
// only (see `cfg/helpers.rs`).
|
||||
// * `execSync(cmd, { env: process.env })` flagging `process.env`
|
||||
// flowing into the options arg — addressed by the
|
||||
// `=exec`/`=execSync`/`=execFile`/... gates in `GATED_SINKS` below
|
||||
// which set `payload_args: &[0]`. The cfg pass propagates a gate's
|
||||
// payload_args restriction onto the matching flat sink so only arg
|
||||
// 0 (the command string) is taint-checked at the call site.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"child_process.exec",
|
||||
|
|
@ -136,8 +173,9 @@ pub static RULES: &[LabelRule] = &[
|
|||
"child_process.execFile",
|
||||
// Bare forms from destructured imports:
|
||||
// const { exec, execSync } = require('child_process')
|
||||
// Note: bare `exec` suffix-matches RegExp.prototype.exec() too,
|
||||
// but in practice tainted data rarely flows to regexp.exec().
|
||||
// and module-aliased calls like `cp.exec(...)`. Receiver-name
|
||||
// collisions (`container.exec`, etc.) are suppressed via
|
||||
// EXCLUDES; arg-position restriction comes from the `=*` gates.
|
||||
"exec",
|
||||
"execSync",
|
||||
"execFile",
|
||||
|
|
@ -250,16 +288,22 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ORM / query builder raw-SQL entry points
|
||||
// ORM / query builder raw-SQL entry points.
|
||||
//
|
||||
// `$queryRaw` / `$executeRaw` are tagged-template forms; the SQL is
|
||||
// assembled from a template literal so taint reaching arg 0 is the
|
||||
// injection vector and modeling them as flat sinks is correct.
|
||||
//
|
||||
// `$queryRawUnsafe` / `$executeRawUnsafe` accept positional bind
|
||||
// parameters: `tx.$queryRawUnsafe(sqlTemplate, p1, p2, ...)` binds
|
||||
// p1..pN as `$1..$N` (PostgreSQL prepared-statement params) and the SQL
|
||||
// template at arg 0 is the only injection point. These are modeled as
|
||||
// gated sinks below (`payload_args: &[0]`) so taint flowing only into
|
||||
// the bind params no longer fires. `sequelize.query` and `knex.raw`
|
||||
// also accept a separate bind-params object/array but the bind-params
|
||||
// interface is non-positional in those APIs, so they stay flat for now.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"sequelize.query",
|
||||
"knex.raw",
|
||||
"$queryRaw",
|
||||
"$queryRawUnsafe",
|
||||
"$executeRaw",
|
||||
"$executeRawUnsafe",
|
||||
],
|
||||
matchers: &["sequelize.query", "knex.raw", "$queryRaw", "$executeRaw"],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
},
|
||||
|
|
@ -295,6 +339,17 @@ pub static EXCLUDES: &[&str] = &[
|
|||
"req.session.regenerate",
|
||||
"req.session.save",
|
||||
"req.session.reload",
|
||||
// Dockerode container API: `container.exec({ Cmd: [...] })` is the
|
||||
// canonical non-shell exec path (the Cmd array is passed directly to
|
||||
// the kernel via `execve`, no shell parsing). `exec.start(...)` is
|
||||
// the follow-on stream attach. Suffix-matching the bare `exec` rule
|
||||
// would otherwise classify every `<receiver>.exec(...)` method call
|
||||
// — including these — as a SHELL_ESCAPE sink. These patterns name
|
||||
// the Dockerode SDK methods specifically; if a project happens to
|
||||
// also expose its own `container.exec` shell wrapper, override via
|
||||
// [analysis.languages.javascript] custom rules.
|
||||
"container.exec",
|
||||
"exec.start",
|
||||
];
|
||||
|
||||
pub static GATED_SINKS: &[SinkGate] = &[
|
||||
|
|
@ -577,6 +632,128 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
object_destination_fields: &["body", "headers", "json"],
|
||||
},
|
||||
},
|
||||
// ── Shell-exec sinks (SHELL_ESCAPE) ──────────────────────────────────
|
||||
//
|
||||
// Only arg 0 (the command string) is a shell-injection payload.
|
||||
// `options.env` / `options.cwd` / etc. at arg 1+ are not. Bare forms
|
||||
// (`exec`, `execSync`, `execFile`, `execAsync`, `execPromise`) use the
|
||||
// `=` exact-only sigil so they match the destructured-import shape
|
||||
// (`const { exec } = require('child_process'); exec(cmd)`) without
|
||||
// colliding with any `<receiver>.exec` method (Dockerode's
|
||||
// `container.exec`, `RegExp.prototype.exec`, etc.).
|
||||
// Qualified `child_process.*` forms stay as flat sinks (see RULES above);
|
||||
// gates run only when no flat sink already classifies the call, so adding
|
||||
// them here would never fire. The bare destructured-import forms below
|
||||
// are the only place where shell-exec needs gating, since `classify_all`
|
||||
// can't safely register a bare `exec` rule without colliding with every
|
||||
// `<receiver>.exec` method (Dockerode `container.exec`,
|
||||
// `RegExp.prototype.exec`, etc.).
|
||||
SinkGate {
|
||||
callee_matcher: "=exec",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "=execSync",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "=execFile",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "=execAsync",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "=execPromise",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// ── Prisma raw-SQL with positional bind params (SQL_QUERY) ───────────
|
||||
//
|
||||
// `tx.$queryRawUnsafe(sqlTemplate, p1, p2, ...)` binds `p1..pN` as
|
||||
// PostgreSQL `$1..$N` prepared-statement parameters; only arg 0 (the
|
||||
// SQL template) is the injection vector. Flat sinks here flagged taint
|
||||
// flowing only into bind params, which is equivalent to a parameterised
|
||||
// query and not exploitable. Suffix-match (no `=` sigil) so
|
||||
// `tx.$queryRawUnsafe`, `prisma.$queryRawUnsafe`, etc. all qualify.
|
||||
SinkGate {
|
||||
callee_matcher: "$queryRawUnsafe",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "$executeRawUnsafe",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
|
|
|
|||
|
|
@ -320,6 +320,11 @@ static GATED_REGISTRY: Lazy<HashMap<&'static str, &'static [SinkGate]>> = Lazy::
|
|||
m.insert("ts", typescript::GATED_SINKS);
|
||||
m.insert("python", python::GATED_SINKS);
|
||||
m.insert("py", python::GATED_SINKS);
|
||||
m.insert("go", go::GATED_SINKS);
|
||||
m.insert("php", php::GATED_SINKS);
|
||||
m.insert("c", c::GATED_SINKS);
|
||||
m.insert("cpp", cpp::GATED_SINKS);
|
||||
m.insert("c++", cpp::GATED_SINKS);
|
||||
m
|
||||
});
|
||||
|
||||
|
|
@ -473,6 +478,10 @@ pub fn lookup(lang: &str, raw: &str) -> Kind {
|
|||
pub enum SourceKind {
|
||||
/// Direct user input (request params, argv, stdin, form data)
|
||||
UserInput,
|
||||
/// HTTP cookie value (carries session / auth material)
|
||||
Cookie,
|
||||
/// HTTP request header (may carry auth tokens, user-agent fingerprints)
|
||||
Header,
|
||||
/// Environment variables and configuration
|
||||
EnvironmentConfig,
|
||||
/// File system reads
|
||||
|
|
@ -485,10 +494,81 @@ pub enum SourceKind {
|
|||
Unknown,
|
||||
}
|
||||
|
||||
/// Sensitivity classification of a taint source. Drives detector classes
|
||||
/// like `DATA_EXFIL` that only fire when the source carries information
|
||||
/// the operator did not intend to leak. Plain user input echoed back into
|
||||
/// an outbound request is not data exfiltration, the user already controls
|
||||
/// it, surfacing it as a leak is noise.
|
||||
///
|
||||
/// The threshold for `DATA_EXFIL` is `>= Sensitive`, plain user input is
|
||||
/// suppressed. Projects that legitimately classify a request body as
|
||||
/// sensitive (e.g. an API gateway forwarding pre-authenticated user tokens
|
||||
/// out of a request body) can override via custom rules in `nyx.conf`,
|
||||
/// either by re-classifying the source or by adding a Sanitizer rule for
|
||||
/// `Cap::DATA_EXFIL` on the legitimate forwarding path.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub enum Sensitivity {
|
||||
/// Attacker-controlled but not secret in itself, request bodies, query
|
||||
/// strings, form fields, argv. Echoing this to an outbound request is
|
||||
/// not data exfiltration.
|
||||
Plain,
|
||||
/// Carries operator state the user should not see leak out, cookies,
|
||||
/// auth headers, env, file system reads, database rows.
|
||||
Sensitive,
|
||||
/// Reserved for future explicit secret classifications (API keys,
|
||||
/// credential stores, key material). No source currently produces
|
||||
/// this, but the threshold check in `effective_sink_caps` already
|
||||
/// handles it monotonically.
|
||||
Secret,
|
||||
}
|
||||
|
||||
impl SourceKind {
|
||||
/// Return the sensitivity tier this source kind belongs to. Drives the
|
||||
/// `Cap::DATA_EXFIL` cap-suppression decision in `ast.rs`.
|
||||
pub fn sensitivity(self) -> Sensitivity {
|
||||
match self {
|
||||
// Plain user-controlled input, the user already has the data,
|
||||
// surfacing it back to them via an outbound request is not a
|
||||
// disclosure.
|
||||
SourceKind::UserInput => Sensitivity::Plain,
|
||||
// Operator-bound state, leaking these via an outbound request
|
||||
// is a real cross-boundary disclosure.
|
||||
SourceKind::Cookie
|
||||
| SourceKind::Header
|
||||
| SourceKind::EnvironmentConfig
|
||||
| SourceKind::FileSystem
|
||||
| SourceKind::Database => Sensitivity::Sensitive,
|
||||
// Caught exceptions can carry stack traces, db errors, internal
|
||||
// paths, treat them as sensitive by default.
|
||||
SourceKind::CaughtException => Sensitivity::Sensitive,
|
||||
// Conservative default for unclassified sources, surface
|
||||
// findings rather than silently drop them.
|
||||
SourceKind::Unknown => Sensitivity::Sensitive,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Infer the source kind from capabilities and callee name.
|
||||
pub fn infer_source_kind(caps: Cap, callee: &str) -> SourceKind {
|
||||
let cl = callee.to_ascii_lowercase();
|
||||
|
||||
// Cookie / Header are checked *before* the generic user-input bucket
|
||||
// because they imply higher sensitivity (auth material, session ids).
|
||||
// The generic UserInput substrings (`request`, `header`, `cookie`)
|
||||
// would otherwise swallow these.
|
||||
//
|
||||
// Session stores carry auth material (CSRF tokens, signed user ids) of
|
||||
// the same sensitivity tier as raw cookies, so route them through the
|
||||
// `Cookie` arm. The substring is checked AFTER excluding the
|
||||
// capitalised `Session` constructor (covered by the `request` /
|
||||
// `requests` checks below not firing for `Session` builders).
|
||||
if cl.contains("cookie") || cl.contains("session") {
|
||||
return SourceKind::Cookie;
|
||||
}
|
||||
if cl.contains("header") {
|
||||
return SourceKind::Header;
|
||||
}
|
||||
|
||||
// User input patterns
|
||||
if cl.contains("argv")
|
||||
|| cl.contains("stdin")
|
||||
|
|
@ -498,11 +578,23 @@ pub fn infer_source_kind(caps: Cap, callee: &str) -> SourceKind {
|
|||
|| cl.contains("params")
|
||||
|| cl.contains("input")
|
||||
|| cl.contains("body")
|
||||
|| cl.contains("header")
|
||||
|| cl.contains("cookie")
|
||||
|| cl.contains("location")
|
||||
|| cl.contains("document.url")
|
||||
|| cl.contains("document.referrer")
|
||||
// PHP superglobals: the AST text preserves the `$` (member-text
|
||||
// extraction reads the `variable_name` node verbatim) so we match
|
||||
// both `$_POST` and the `_POST` form some collectors emit.
|
||||
// `$_REQUEST` already matches via the `request` substring above;
|
||||
// `$_COOKIE` / `$_SESSION` route through the Cookie tier earlier in
|
||||
// the function. `$_SERVER` is operator-state-bearing (auth headers
|
||||
// etc.) so it stays Sensitive by falling through to the Unknown
|
||||
// bucket.
|
||||
|| cl == "$_get"
|
||||
|| cl == "$_post"
|
||||
|| cl == "$_files"
|
||||
|| cl == "_get"
|
||||
|| cl == "_post"
|
||||
|| cl == "_files"
|
||||
{
|
||||
return SourceKind::UserInput;
|
||||
}
|
||||
|
|
@ -542,6 +634,8 @@ pub fn infer_source_kind(caps: Cap, callee: &str) -> SourceKind {
|
|||
pub fn severity_for_source_kind(kind: SourceKind) -> crate::patterns::Severity {
|
||||
match kind {
|
||||
SourceKind::UserInput => crate::patterns::Severity::High,
|
||||
SourceKind::Cookie => crate::patterns::Severity::High,
|
||||
SourceKind::Header => crate::patterns::Severity::High,
|
||||
SourceKind::EnvironmentConfig => crate::patterns::Severity::High,
|
||||
SourceKind::FileSystem => crate::patterns::Severity::Medium,
|
||||
SourceKind::Database => crate::patterns::Severity::Medium,
|
||||
|
|
@ -986,11 +1080,20 @@ pub fn classify_gated_sink(
|
|||
None => return out,
|
||||
};
|
||||
|
||||
// Match against the original callee text AND a chain-normalised form
|
||||
// that strips `()` between dots so a chained construction like
|
||||
// `httpx.AsyncClient().post` matches a gate matcher of
|
||||
// `httpx.AsyncClient.post`. Mirrors the normalisation applied by
|
||||
// `classify` for flat label rules.
|
||||
let callee_bytes = callee_text.as_bytes();
|
||||
let normalized = normalize_chained_call(callee_text);
|
||||
let normalized_bytes = normalized.as_bytes();
|
||||
|
||||
for gate in *gates {
|
||||
let matcher = gate.callee_matcher.as_bytes();
|
||||
if !match_suffix_cs(callee_bytes, matcher, gate.case_sensitive) {
|
||||
if !match_suffix_cs(callee_bytes, matcher, gate.case_sensitive)
|
||||
&& !match_suffix_cs(normalized_bytes, matcher, gate.case_sensitive)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -1473,26 +1576,69 @@ mod tests {
|
|||
// CVE Hunt Session 2 (Go CVE-2023-3188 Owncast SSRF):
|
||||
// `http.DefaultClient.Get/Post/Head/Do/PostForm` is the idiomatic Go
|
||||
// SSRF sink shape (`http.DefaultClient` is the package-level shared
|
||||
// `*http.Client`). Bare `Get`/`Post` matchers would over-match
|
||||
// unrelated method names; the explicit `http.DefaultClient.*` matcher
|
||||
// restricts the suffix-match to the stdlib helper while leaving
|
||||
// user-defined `myClient.Get` alone (no false positives).
|
||||
// `*http.Client`). These callees migrated from a flat `Sink(SSRF)`
|
||||
// rule to destination-aware gated sinks so that DATA_EXFIL gates can
|
||||
// coexist on the same callee (e.g. `http.DefaultClient.Post(url, _,
|
||||
// body)` carries SSRF on arg 0 and DATA_EXFIL on arg 2). The
|
||||
// assertions below check the gate registration rather than the flat
|
||||
// classifier output.
|
||||
#[test]
|
||||
fn classify_go_http_default_client_get_is_ssrf_sink() {
|
||||
let result = classify("go", "http.DefaultClient.Get", None);
|
||||
assert_eq!(result, Some(DataLabel::Sink(Cap::SSRF)));
|
||||
fn classify_go_http_default_client_get_is_ssrf_gate() {
|
||||
let no_kw = |_: &str| None;
|
||||
let no_kw_present = |_: &str| false;
|
||||
let result = classify_gated_sink(
|
||||
"go",
|
||||
"http.DefaultClient.Get",
|
||||
|_| None,
|
||||
no_kw,
|
||||
no_kw_present,
|
||||
);
|
||||
assert!(
|
||||
result.iter().any(|m| m.label == DataLabel::Sink(Cap::SSRF)),
|
||||
"expected SSRF gate match, got {result:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_go_http_default_client_post_is_ssrf_sink() {
|
||||
let result = classify("go", "http.DefaultClient.Post", None);
|
||||
assert_eq!(result, Some(DataLabel::Sink(Cap::SSRF)));
|
||||
fn classify_go_http_default_client_post_is_ssrf_and_data_exfil_gate() {
|
||||
let no_kw = |_: &str| None;
|
||||
let no_kw_present = |_: &str| false;
|
||||
let result = classify_gated_sink(
|
||||
"go",
|
||||
"http.DefaultClient.Post",
|
||||
|_| None,
|
||||
no_kw,
|
||||
no_kw_present,
|
||||
);
|
||||
assert!(
|
||||
result.iter().any(|m| m.label == DataLabel::Sink(Cap::SSRF)),
|
||||
"expected SSRF gate match, got {result:?}"
|
||||
);
|
||||
assert!(
|
||||
result
|
||||
.iter()
|
||||
.any(|m| m.label == DataLabel::Sink(Cap::DATA_EXFIL)),
|
||||
"expected DATA_EXFIL gate match, got {result:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_go_http_default_client_do_is_ssrf_sink() {
|
||||
let result = classify("go", "http.DefaultClient.Do", None);
|
||||
assert_eq!(result, Some(DataLabel::Sink(Cap::SSRF)));
|
||||
fn classify_go_http_default_client_do_is_data_exfil_gate() {
|
||||
let no_kw = |_: &str| None;
|
||||
let no_kw_present = |_: &str| false;
|
||||
let result = classify_gated_sink(
|
||||
"go",
|
||||
"http.DefaultClient.Do",
|
||||
|_| None,
|
||||
no_kw,
|
||||
no_kw_present,
|
||||
);
|
||||
assert!(
|
||||
result
|
||||
.iter()
|
||||
.any(|m| m.label == DataLabel::Sink(Cap::DATA_EXFIL)),
|
||||
"expected DATA_EXFIL gate match, got {result:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
use crate::labels::{Cap, DataLabel, Kind, LabelRule, ParamConfig, RuntimeLabelRule};
|
||||
use crate::labels::{
|
||||
Cap, DataLabel, GateActivation, Kind, LabelRule, ParamConfig, RuntimeLabelRule, SinkGate,
|
||||
};
|
||||
use crate::utils::project::{DetectedFramework, FrameworkContext};
|
||||
use phf::{Map, phf_map};
|
||||
|
||||
|
|
@ -138,8 +140,67 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ── Cross-boundary data exfiltration ──────────────────────────────────
|
||||
//
|
||||
// Body-bearing outbound HTTP verb methods on the major PHP HTTP clients.
|
||||
// Flat sinks here compose with the SSRF rule on `curl_exec` /
|
||||
// `file_get_contents` via multi-label classification. The
|
||||
// source-sensitivity gate in `effective_sink_caps` strips DATA_EXFIL
|
||||
// when the contributing source is `Plain` (`$_GET`, `$_POST`, `$_REQUEST`),
|
||||
// so this only fires for sensitive sources (cookies / sessions /
|
||||
// server-side state / env / file / db reads).
|
||||
//
|
||||
// Covered clients:
|
||||
// * `Guzzle\Client::post/put/patch` — guzzlehttp/guzzle
|
||||
// matched by suffix on the verb method (chained `$client->post(...)`).
|
||||
// * `Symfony\HttpClient::request` — symfony/http-client
|
||||
// request($method, $url, ['body' => $payload, 'json' => $data, ...])
|
||||
// * `Http::post` — Laravel HTTP facade (over Guzzle)
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"Client.post",
|
||||
"Client.put",
|
||||
"Client.patch",
|
||||
"Client.request",
|
||||
"HttpClient.post",
|
||||
"HttpClient.put",
|
||||
"HttpClient.patch",
|
||||
"HttpClient.request",
|
||||
"Http.post",
|
||||
"Http.put",
|
||||
"Http.patch",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: true,
|
||||
},
|
||||
];
|
||||
|
||||
/// Gated sinks for PHP.
|
||||
///
|
||||
/// `curl_setopt($ch, CURLOPT_POSTFIELDS, $payload)` is the canonical
|
||||
/// non-OO PHP HTTP-egress payload binding. The activation arg (index 1) is
|
||||
/// a `define`d constant: `CURLOPT_POSTFIELDS` (and the byref-copying variant
|
||||
/// `CURLOPT_COPYPOSTFIELDS`) carry the request body, while other CURLOPT_*
|
||||
/// constants designate URL / auth / TLS / behaviour, none of which is
|
||||
/// DATA_EXFIL-relevant. Gating on the constant identifier keeps the rule
|
||||
/// from over-firing on `curl_setopt($ch, CURLOPT_URL, $url)` (covered
|
||||
/// elsewhere by the `curl_exec` SSRF flat sink).
|
||||
///
|
||||
/// Identifier-based activation is enabled via the macro-arg fallback in
|
||||
/// `cfg::mod::classify_gated_sink` for `lang == "php"`.
|
||||
pub static GATED_SINKS: &[SinkGate] = &[SinkGate {
|
||||
callee_matcher: "curl_setopt",
|
||||
arg_index: 1,
|
||||
dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: true,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::ValueMatch,
|
||||
}];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
// control-flow
|
||||
"if_statement" => Kind::If,
|
||||
|
|
|
|||
|
|
@ -44,6 +44,34 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Session stores: session cookies / DRF / Django auth carry auth material
|
||||
// the operator did not intend to leak. `infer_source_kind` maps `session`
|
||||
// callees to `SourceKind::Cookie` (Sensitive) so flowing into an outbound
|
||||
// request payload fires `DATA_EXFIL`. Case-sensitive: lowercase `session`
|
||||
// here is the Flask global / Django request attribute; the capitalised
|
||||
// `requests.Session` constructor is a client object, not a source, and
|
||||
// must not be tagged.
|
||||
//
|
||||
// The matchers cover both attribute access (`request.session.user_id`,
|
||||
// resolved as the attribute text) and the bare `session.<method>`
|
||||
// pattern that follows `from flask import session`. The `=session`
|
||||
// exact-match form fires only when the call is the bare top-level
|
||||
// `session(...)` so accidental field projections like
|
||||
// `obj.client.session` (Phase 2 chained-receiver lowering) don't get
|
||||
// mis-labelled as sources.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"request.session",
|
||||
"flask_request.session",
|
||||
"flask.session",
|
||||
"django.contrib.sessions",
|
||||
"=session",
|
||||
"session.get",
|
||||
"session.pop",
|
||||
],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// Django-specific sources (case-sensitive to avoid request.get() dict method FP)
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
|
|
@ -208,58 +236,25 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sanitizer(Cap::FILE_IO),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Outbound HTTP — flat SSRF sinks for read-shaped methods (GET / HEAD)
|
||||
// that don't carry a body. Body-bearing methods (POST / PUT / PATCH /
|
||||
// DELETE / request) are modelled via destination-aware gates in
|
||||
// GATED_SINKS so SSRF activation can be narrowed to the URL position
|
||||
// and the cross-boundary `DATA_EXFIL` cap can attach to body kwargs as
|
||||
// a separate gate. `urllib.request.urlopen` stays flat: its argument
|
||||
// is a Request object whose payload-vs-URL split happens at
|
||||
// `urllib.request.Request` construction (gated below).
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"urllib.request.urlopen",
|
||||
"requests.get",
|
||||
"requests.post",
|
||||
"requests.put",
|
||||
"requests.delete",
|
||||
"requests.patch",
|
||||
"requests.head",
|
||||
"requests.request",
|
||||
"httpx.get",
|
||||
"httpx.post",
|
||||
"httpx.put",
|
||||
"httpx.delete",
|
||||
"httpx.patch",
|
||||
"httpx.head",
|
||||
"httpx.request",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// aiohttp HTTP client, SSRF sinks
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"aiohttp.get",
|
||||
"aiohttp.post",
|
||||
"aiohttp.put",
|
||||
"aiohttp.delete",
|
||||
"aiohttp.request",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Type-qualified SSRF sinks: when the receiver is tracked as
|
||||
// TypeKind::HttpClient (e.g. `client = requests.Session()`,
|
||||
// `client = httpx.Client()`, or `s = aiohttp.ClientSession()`),
|
||||
// resolve_type_qualified_labels() constructs `"HttpClient.<method>"`
|
||||
// call texts so the receiver-name is no longer load-bearing. Matches
|
||||
// the existing Rust HttpClient.<method> sink set so both languages
|
||||
// stay in step on the type-aware SSRF model. Motivated by the
|
||||
// upstream LMDeploy CVE-2026-33626 shape:
|
||||
// client = requests.Session()
|
||||
// response = client.get(url, ...)
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"aiohttp.head",
|
||||
"HttpClient.get",
|
||||
"HttpClient.post",
|
||||
"HttpClient.put",
|
||||
"HttpClient.delete",
|
||||
"HttpClient.patch",
|
||||
"HttpClient.head",
|
||||
"HttpClient.request",
|
||||
"HttpClient.send",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
|
|
@ -332,6 +327,687 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
dangerous_kwargs: &[("shell", &["True", "true"])],
|
||||
activation: GateActivation::ValueMatch,
|
||||
},
|
||||
// ── Outbound HTTP clients (SSRF + cross-boundary data exfiltration) ───
|
||||
//
|
||||
// Body-bearing methods (POST / PUT / PATCH / DELETE / request) are
|
||||
// gated by destination so that:
|
||||
// * SSRF fires only when taint reaches the URL position (arg 0).
|
||||
// * `DATA_EXFIL` fires only when taint reaches a body kwarg (`data` /
|
||||
// `json` / `files` for requests / aiohttp; `content` / `data` /
|
||||
// `json` / `files` for httpx).
|
||||
// The pair lets a single `requests.post(taintedUrl, data=secret)` call
|
||||
// report SSRF on the URL flow and DATA_EXFIL on the body flow as
|
||||
// independent findings rather than a conflated combined cap.
|
||||
//
|
||||
// CFG-level kwarg-aware extraction (see `extract_destination_kwarg_pairs`)
|
||||
// walks `keyword_argument` siblings and routes matching idents into the
|
||||
// gate's `destination_uses` so the SSA sink scan only fires when the
|
||||
// body kwarg itself is tainted.
|
||||
//
|
||||
// The source-sensitivity gate in `ast.rs` strips DATA_EXFIL when the
|
||||
// contributing source is `Sensitivity::Plain` (raw `request.args`,
|
||||
// `request.form`), so plain user input forwarded to a POST body does
|
||||
// not surface — only sensitive sources (cookies, sessions, env, headers)
|
||||
// produce a DATA_EXFIL finding.
|
||||
SinkGate {
|
||||
callee_matcher: "requests.post",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "requests.post",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "requests.put",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "requests.put",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "requests.patch",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "requests.patch",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "requests.delete",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "requests.delete",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["data", "json", "files"],
|
||||
},
|
||||
},
|
||||
// requests.request(method, url, ...) — note the URL is at arg 1, not
|
||||
// arg 0; method is at arg 0. Body kwargs at arg 2+ via kwarg expansion.
|
||||
SinkGate {
|
||||
callee_matcher: "requests.request",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "requests.request",
|
||||
arg_index: 2,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["data", "json", "files"],
|
||||
},
|
||||
},
|
||||
// httpx — `content` is httpx's raw-bytes body kwarg; `data` covers
|
||||
// form-encoded; `json` covers JSON-encoded; `files` covers multipart.
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.post",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.post",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.put",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.put",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.patch",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.patch",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.delete",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.delete",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
// httpx.request(method, url, ...) — same shape as requests.request.
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.request",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.request",
|
||||
arg_index: 2,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
// Type-qualified variants: `requests.Session()`, `httpx.Client()`,
|
||||
// `httpx.AsyncClient()`, `aiohttp.ClientSession()` instances all resolve
|
||||
// to the synthetic `HttpClient.<method>` callee text via
|
||||
// `resolve_type_qualified_labels`. Covering both module-level and
|
||||
// type-qualified forms ensures `s = requests.Session(); s.post(url, data=x)`
|
||||
// and `client = httpx.AsyncClient(); await client.post(url, json=x)` both
|
||||
// fire SSRF on the URL and DATA_EXFIL on the body kwarg.
|
||||
SinkGate {
|
||||
callee_matcher: "HttpClient.post",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "HttpClient.post",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "HttpClient.put",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "HttpClient.put",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "HttpClient.patch",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "HttpClient.patch",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "HttpClient.delete",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "HttpClient.delete",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "HttpClient.request",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "HttpClient.request",
|
||||
arg_index: 2,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
// aiohttp module-level (`aiohttp.post`, `aiohttp.put`, etc.) — uncommon
|
||||
// in real code (idiomatic usage is `async with aiohttp.ClientSession()`),
|
||||
// covered for completeness. ClientSession.<method> dispatches via the
|
||||
// type-qualified `HttpClient.<method>` gates above.
|
||||
SinkGate {
|
||||
callee_matcher: "aiohttp.post",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "aiohttp.post",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["data", "json"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "aiohttp.put",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "aiohttp.put",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["data", "json"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "aiohttp.request",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "aiohttp.request",
|
||||
arg_index: 2,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["data", "json"],
|
||||
},
|
||||
},
|
||||
// Chained-construction variants: `httpx.AsyncClient().post(url, json=x)`
|
||||
// / `httpx.Client().post(url, ...)` / `aiohttp.ClientSession().post(...)`.
|
||||
// Chain-normalisation strips `()` between dots so the callee text
|
||||
// becomes `httpx.AsyncClient.post`; gate matching applies to that
|
||||
// normalised form so the chained shape is covered without binding to
|
||||
// an intermediate variable.
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.AsyncClient.post",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.AsyncClient.post",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.Client.post",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.Client.post",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "aiohttp.ClientSession.post",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "aiohttp.ClientSession.post",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["data", "json"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "requests.Session.post",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "requests.Session.post",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["data", "json", "files"],
|
||||
},
|
||||
},
|
||||
// urllib.request.urlopen(req) — when req is a `urllib.request.Request`
|
||||
// built with the `data` kwarg, that kwarg becomes the POST body. The
|
||||
// gate fires on `Request(url, data=tainted)` directly: the constructor
|
||||
// does not egress, but the convention is that wrapping data in a Request
|
||||
// means egress is imminent (the urllib.request.Request → urlopen path).
|
||||
// This is a heuristic — the real egress happens at urlopen, but tracking
|
||||
// the data flow through the constructor is a fair static approximation.
|
||||
SinkGate {
|
||||
callee_matcher: "urllib.request.Request",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["data"],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
|
|
|
|||
|
|
@ -28,6 +28,16 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Sensitive request state: cookies and session stores carry auth material
|
||||
// / CSRF tokens / signed user ids the operator did not intend to leak.
|
||||
// `infer_source_kind` routes substrings containing "cookie" or "session"
|
||||
// through `SourceKind::Cookie` (Sensitive), so flow into outbound request
|
||||
// payloads activates the `DATA_EXFIL` cap added below.
|
||||
LabelRule {
|
||||
matchers: &["request.cookies", "request.session", "cookies", "session"],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ───────── Sanitizers ──────────
|
||||
LabelRule {
|
||||
matchers: &["CGI.escapeHTML", "ERB::Util.html_escape"],
|
||||
|
|
@ -135,6 +145,55 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ── Cross-boundary data exfiltration ──────────────────────────────────
|
||||
//
|
||||
// Body-bearing outbound HTTP verb methods. A flat Sink(DATA_EXFIL) here
|
||||
// composes with the SSRF rule above via multi-label classification:
|
||||
// `Net::HTTP.post(uri, payload)` reports SSRF on the URL flow (arg 0)
|
||||
// and DATA_EXFIL on the body flow (arg 1+) as separate findings. The
|
||||
// source-sensitivity gate in `effective_sink_caps` strips DATA_EXFIL
|
||||
// when the contributing source is `Plain` (raw `params`), so this only
|
||||
// fires for sensitive sources (cookies / session / env / headers /
|
||||
// file / db reads).
|
||||
//
|
||||
// Covered clients:
|
||||
// * `Net::HTTP.post(uri, data, headers)` — stdlib
|
||||
// * `Net::HTTP::Post.new(path)` body= setter — emitted as
|
||||
// `Net::HTTP::Post.body=` after Ruby setter normalisation; flat rule
|
||||
// ensures any tainted assignment to `.body` smears into the request
|
||||
// * `RestClient.post(url, payload, headers)` — rest-client gem
|
||||
// * `Faraday.post(url, body, headers)` — faraday
|
||||
// * `HTTParty.post(url, body: ..., headers: ...)` — already a Sink(SSRF)
|
||||
// above, DATA_EXFIL adds independently
|
||||
// * `Typhoeus.post(url, body: ...)` — typhoeus
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"Net::HTTP.post",
|
||||
"RestClient.post",
|
||||
"RestClient.put",
|
||||
"RestClient.patch",
|
||||
"Faraday.post",
|
||||
"Faraday.put",
|
||||
"Faraday.patch",
|
||||
"HTTParty.post",
|
||||
"HTTParty.put",
|
||||
"HTTParty.patch",
|
||||
"Typhoeus.post",
|
||||
"Typhoeus.put",
|
||||
"Typhoeus.patch",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Generic outbound-method suffix matchers for chained / typed receivers
|
||||
// (e.g. `client.post(payload)` where `client` is a configured Faraday or
|
||||
// RestClient instance). Suffix-match keeps the rule compact; source
|
||||
// sensitivity gates noise from plain user input.
|
||||
LabelRule {
|
||||
matchers: &["HttpClient.post", "HttpClient.put", "HttpClient.patch"],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["Marshal.load", "Marshal.restore", "YAML.load"],
|
||||
label: DataLabel::Sink(Cap::DESERIALIZE),
|
||||
|
|
|
|||
|
|
@ -19,6 +19,34 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Inbound HTTP request metadata: headers, cookies, query strings,
|
||||
// and body extractors. These only carry caller-supplied bytes when
|
||||
// the framework binds them (the framework-conditional rules attach
|
||||
// the same labels for axum / actix / rocket extractors). Including
|
||||
// the bare suffix matchers here means a `req.headers().get("h")`
|
||||
// chain in non-framework code (e.g. internal helpers that take an
|
||||
// `&HeaderMap`) still surfaces as a Source. `infer_source_kind`
|
||||
// routes these to `Header` / `Cookie` (Sensitive), enabling
|
||||
// DATA_EXFIL gating downstream.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
// Type-qualified (receiver typed as HttpRequest, HeaderMap, ...)
|
||||
"HttpRequest.headers",
|
||||
"HttpRequest.cookie",
|
||||
"HttpRequest.cookies",
|
||||
"Request.headers",
|
||||
"Request.cookies",
|
||||
"Request.uri",
|
||||
// Bare HeaderMap / cookie-jar accessors.
|
||||
"headers.get",
|
||||
"headers.get_all",
|
||||
"CookieJar.get",
|
||||
"CookieJar.get_private",
|
||||
"CookieJar.get_signed",
|
||||
],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ───────── Sanitizers ──────────
|
||||
LabelRule {
|
||||
matchers: &["html_escape::encode_safe", "sanitize_", "sanitize_html"],
|
||||
|
|
@ -75,6 +103,34 @@ pub static RULES: &[LabelRule] = &[
|
|||
"reqwest::Client.head",
|
||||
"reqwest::Client.patch",
|
||||
"reqwest::Client.request",
|
||||
// Chained constructor + verb form: `reqwest::Client::new()
|
||||
// .post(url)` reduces (via root-receiver collapse) to chain
|
||||
// text `Client::new.post`, so existing `Client.post` matchers
|
||||
// miss it. Cover the chained shape directly.
|
||||
"Client::new.get",
|
||||
"Client::new.post",
|
||||
"Client::new.put",
|
||||
"Client::new.delete",
|
||||
"Client::new.head",
|
||||
"Client::new.patch",
|
||||
"Client::new.request",
|
||||
// surf free verbs are themselves SSRF gates , the URL is
|
||||
// their first positional argument.
|
||||
"surf::get",
|
||||
"surf::post",
|
||||
"surf::put",
|
||||
"surf::delete",
|
||||
"surf::head",
|
||||
"surf::patch",
|
||||
"surf::connect",
|
||||
"surf::trace",
|
||||
// ureq free verbs are HTTP request initiators.
|
||||
"ureq::get",
|
||||
"ureq::post",
|
||||
"ureq::put",
|
||||
"ureq::delete",
|
||||
"ureq::patch",
|
||||
"ureq::head",
|
||||
// Type-qualified (receiver typed as HttpClient)
|
||||
"HttpClient.get",
|
||||
"HttpClient.post",
|
||||
|
|
@ -89,6 +145,68 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Cross-boundary data exfiltration sinks. Outbound HTTP egress where
|
||||
// a Sensitive source (env, header, cookie, file, db) reaching the
|
||||
// request body / payload is a leak distinct from SSRF. Plain user
|
||||
// input is silenced by the source-sensitivity gate, so these only
|
||||
// fire when the source carries operator-bound state.
|
||||
//
|
||||
// Body-binding methods on the request builder: `body`, `json`, `form`,
|
||||
// `multipart` (reqwest); `body_string`, `body_json`, `body_bytes`
|
||||
// (surf); `send_string`, `send_json`, `send_form` (ureq, which
|
||||
// combines body-bind and dispatch). Plus `.send()` on an HttpClient
|
||||
// / RequestBuilder, where the chain receiver is typed. Chain text
|
||||
// matchers like `body.send` cover the all-in-one form
|
||||
// `Client::post(url).body(payload).send()`.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
// Type-qualified terminal verbs (split form, typed receiver).
|
||||
"HttpClient.send",
|
||||
"HttpClient.execute",
|
||||
"RequestBuilder.send",
|
||||
// Type-qualified body-bind methods on a typed RequestBuilder.
|
||||
"RequestBuilder.body",
|
||||
"RequestBuilder.json",
|
||||
"RequestBuilder.form",
|
||||
"RequestBuilder.multipart",
|
||||
"RequestBuilder.body_string",
|
||||
"RequestBuilder.body_json",
|
||||
"RequestBuilder.body_bytes",
|
||||
"RequestBuilder.send_string",
|
||||
"RequestBuilder.send_json",
|
||||
"RequestBuilder.send_form",
|
||||
// surf / ureq method names that are unambiguous in Rust ,
|
||||
// they only appear on HTTP request builders, so a bare-name
|
||||
// suffix matcher is safe.
|
||||
"body_string",
|
||||
"body_json",
|
||||
"body_bytes",
|
||||
"send_string",
|
||||
"send_json",
|
||||
"send_form",
|
||||
// Reqwest chain shapes. After paren-group strip the chain
|
||||
// text becomes `Client::post.body.send`, so the body-bind
|
||||
// verb sits before `.send` and a `body.send` suffix matcher
|
||||
// pins exfil-only firing to chains that actually bind a body.
|
||||
"body.send",
|
||||
"json.send",
|
||||
"form.send",
|
||||
"multipart.send",
|
||||
// hyper Request::builder().method(...).body(payload) , the
|
||||
// body-bind step is the leak point. `.unwrap` is a common
|
||||
// trailing identity method; we cover both shapes.
|
||||
"Request::builder.body",
|
||||
"Request::builder.method.body",
|
||||
"Request::builder.method.body.unwrap",
|
||||
"Request::builder.body.unwrap",
|
||||
// Two-step reqwest where the user has a dedicated `Client`
|
||||
// variable and uses `.execute(req)` on it.
|
||||
"Client::new.send",
|
||||
"Client::new.execute",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"rusqlite::Connection.execute",
|
||||
|
|
|
|||
|
|
@ -92,6 +92,22 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Conventional forwarding wrappers, telemetry / analytics / metrics dispatch.
|
||||
// See javascript.rs for rationale; mirrored here so TypeScript projects pick
|
||||
// up the same convention. Override per-project via
|
||||
// [analysis.languages.typescript] custom rules.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"serializeForUpstream",
|
||||
"forwardPayload",
|
||||
"tracker.send",
|
||||
"analytics.track",
|
||||
"metrics.report",
|
||||
"logEvent",
|
||||
],
|
||||
label: DataLabel::Sanitizer(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Conventional project-local HTML escapers. Suffix word-boundary match
|
||||
// fires on bare calls to locally defined helpers (`function escapeHtml(x)`
|
||||
// invoked as `escapeHtml(x)`) across codebases that follow the common
|
||||
|
|
@ -113,18 +129,21 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Shell-exec sinks. Qualified `child_process.*` and bare forms are both
|
||||
// flat sinks; receiver-name collisions are handled via EXCLUDES; the
|
||||
// `=*` gates in `GATED_SINKS` below restrict checked args to arg 0
|
||||
// (command string) so `execSync(cmd, { env: process.env })` no longer
|
||||
// flags `process.env` flowing into the options object. See
|
||||
// javascript.rs for full rationale.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"child_process.exec",
|
||||
"child_process.execSync",
|
||||
"child_process.spawn",
|
||||
"child_process.execFile",
|
||||
// Bare forms from destructured imports:
|
||||
// const { exec, execSync } = require('child_process')
|
||||
"exec",
|
||||
"execSync",
|
||||
"execFile",
|
||||
// Common promisified wrappers around child_process.exec
|
||||
"execAsync",
|
||||
"execPromise",
|
||||
],
|
||||
|
|
@ -227,16 +246,12 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ORM / query builder raw-SQL entry points
|
||||
// ORM / query builder raw-SQL entry points. `$queryRawUnsafe` /
|
||||
// `$executeRawUnsafe` are gated below — only arg 0 (the SQL template) is
|
||||
// the injection vector; positional bind params are bound as `$1..$N`.
|
||||
// See javascript.rs for the full rationale.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"sequelize.query",
|
||||
"knex.raw",
|
||||
"$queryRaw",
|
||||
"$queryRawUnsafe",
|
||||
"$executeRaw",
|
||||
"$executeRawUnsafe",
|
||||
],
|
||||
matchers: &["sequelize.query", "knex.raw", "$queryRaw", "$executeRaw"],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
},
|
||||
|
|
@ -264,6 +279,9 @@ pub static EXCLUDES: &[&str] = &[
|
|||
"req.app",
|
||||
"req.route",
|
||||
"req.next",
|
||||
// Dockerode container API — see javascript.rs EXCLUDES for rationale.
|
||||
"container.exec",
|
||||
"exec.start",
|
||||
];
|
||||
|
||||
pub static GATED_SINKS: &[SinkGate] = &[
|
||||
|
|
@ -478,6 +496,113 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
object_destination_fields: &["body", "headers", "json"],
|
||||
},
|
||||
},
|
||||
// ── Shell-exec sinks (SHELL_ESCAPE) ──────────────────────────────────
|
||||
// See javascript.rs for the rationale. Only arg 0 (command string)
|
||||
// carries the shell-injection payload; bare forms use `=` exact-only
|
||||
// matching so they don't collide with any `<receiver>.exec` method.
|
||||
// Qualified `child_process.*` forms stay as flat sinks; gates only fire
|
||||
// when no flat sink classifies the call, so the bare destructured-import
|
||||
// forms below are the only place where shell-exec needs gating.
|
||||
SinkGate {
|
||||
callee_matcher: "=exec",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "=execSync",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "=execFile",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "=execAsync",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "=execPromise",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// ── Prisma raw-SQL with positional bind params (SQL_QUERY) ───────────
|
||||
// See javascript.rs for rationale.
|
||||
SinkGate {
|
||||
callee_matcher: "$queryRawUnsafe",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "$executeRawUnsafe",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue