new capacity bits (#67)

This commit is contained in:
Eli Peter 2026-05-07 01:29:31 -04:00 committed by GitHub
parent afaffc0df6
commit 7d0e7320e2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
261 changed files with 10591 additions and 231 deletions

View file

@ -61,7 +61,7 @@ pub static RULES: &[LabelRule] = &[
// pattern that follows `from flask import session`. The `=session`
// exact-match form fires only when the call is the bare top-level
// `session(...)` so accidental field projections like
// `obj.client.session` (Phase 2 chained-receiver lowering) don't get
// `obj.client.session` (chained-receiver lowering) don't get
// mis-labelled as sources.
LabelRule {
matchers: &[
@ -284,6 +284,212 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::DESERIALIZE),
case_sensitive: false,
},
// ─── LDAP injection sinks ───
//
// python-ldap exposes module-level `ldap.search_s` / `ldap.search_ext_s`
// and method-style `conn.search_s(base, scope, filter)` after `conn =
// ldap.initialize(url)`. Suffix matching on the method names catches both
// the qualified form (`ldap.search_s`, matched as a literal) and the
// bound-receiver form (`conn.search_s` ends with `search_s`). ldap3 uses
// `Connection(server, ...)` whose `.search(...)` accepts a filter kwarg /
// positional; receiver typing tags the connection as `TypeKind::LdapClient`
// so type-qualified resolution rewrites `conn.search` → `LdapClient.search`.
LabelRule {
matchers: &[
"ldap.search_s",
"ldap.search_ext_s",
"search_s",
"search_ext_s",
"LdapClient.search",
"ldap3.Connection.search",
],
label: DataLabel::Sink(Cap::LDAP_INJECTION),
case_sensitive: true,
},
// ─── LDAP-filter sanitizers ───
//
// python-ldap: `ldap.filter.escape_filter_chars(s)` and ldap3's
// `ldap3.utils.conv.escape_filter_chars(s)` both apply RFC 4515 escaping
// to filter metacharacters. Suffix matching on `escape_filter_chars`
// covers both the fully-qualified import and the bare-name destructured
// import (`from ldap.filter import escape_filter_chars`).
LabelRule {
matchers: &[
"escape_filter_chars",
"ldap.filter.escape_filter_chars",
"ldap3.utils.conv.escape_filter_chars",
],
label: DataLabel::Sanitizer(Cap::LDAP_INJECTION),
case_sensitive: false,
},
// ─── XPath injection sinks ───
//
// lxml: `tree.xpath(expr)` / `etree.XPath(expr)` accept an
// attacker-influenceable expression string. ElementTree's
// `find` / `findall` / `findtext` accept the same kind of XPath subset
// and admit injection when the path is built by string concatenation.
// Suffix matching on the bare method names catches both
// `lxml.etree._Element.xpath(...)` and `tree.xpath(...)` shapes.
LabelRule {
matchers: &[
"xpath",
"lxml.etree.XPath",
"etree.XPath",
"ElementTree.find",
"ElementTree.findall",
"ElementTree.findtext",
],
label: DataLabel::Sink(Cap::XPATH_INJECTION),
case_sensitive: true,
},
// ─── XPath escape sanitizers ───
//
// No standard library helper escapes XPath metacharacters; project-local
// `escape_xpath` / `xpath_escape` are the developer-named equivalents.
LabelRule {
matchers: &["escape_xpath", "xpath_escape"],
label: DataLabel::Sanitizer(Cap::XPATH_INJECTION),
case_sensitive: false,
},
// ─── Header / CRLF injection sinks ───
//
// Flask / Werkzeug response APIs that write a single header value:
// `response.headers.add(name, val)`, `response.set_cookie(name, val)`,
// and the bare subscript-set form `response.headers[name] = val`.
// The subscript-set form is picked up via the LHS-subscript
// classification path in `cfg/mod.rs::push_node`: the LHS object's
// member-expression text matches `response.headers` /
// `self.response.headers` and tags the assignment as a HEADER_INJECTION
// sink.
LabelRule {
matchers: &["headers.add", "headers.set", "set_cookie"],
label: DataLabel::Sink(Cap::HEADER_INJECTION),
case_sensitive: false,
},
LabelRule {
matchers: &["response.headers", "self.response.headers", "resp.headers"],
label: DataLabel::Sink(Cap::HEADER_INJECTION),
case_sensitive: false,
},
// ─── Header / CRLF sanitizers ───
LabelRule {
matchers: &["strip_crlf", "escape_header", "sanitize_header"],
label: DataLabel::Sanitizer(Cap::HEADER_INJECTION),
case_sensitive: false,
},
// ─── Open redirect sinks ───
//
// Flask `redirect(url)`, Django `HttpResponseRedirect(url)`, FastAPI /
// Starlette `RedirectResponse(url=...)`. Tainted URL flowing to any of
// these without an allowlist check is an open-redirect vector.
LabelRule {
matchers: &[
"redirect",
"flask.redirect",
"django.shortcuts.redirect",
"HttpResponseRedirect",
"RedirectResponse",
],
label: DataLabel::Sink(Cap::OPEN_REDIRECT),
case_sensitive: true,
},
LabelRule {
matchers: &[
"validate_redirect_url",
"is_safe_redirect",
"strip_scheme",
"ensure_relative_url",
"assert_relative_path",
"is_relative_url",
],
label: DataLabel::Sanitizer(Cap::OPEN_REDIRECT),
case_sensitive: false,
},
// ─── SSTI sinks ───
//
// Template-engine constructors / `from_string` factories that accept the
// template *source string* as arg 0. `flask.render_template` takes a
// file PATH (not source) so does NOT match here — the safe API stays
// clean by name.
LabelRule {
matchers: &[
"=Template",
"jinja2.Template",
"jinja2.Environment.from_string",
"Environment.from_string",
// `compile_expression` is jinja2-specific terminology (it returns a
// callable from an inline expression source). Bare suffix lets the
// rule fire on idiomatic instance shapes (`env.compile_expression(s)`)
// without a `jinja2.Environment` TypeKind.
"compile_expression",
"mako.template.Template",
"Template.render",
],
label: DataLabel::Sink(Cap::SSTI),
case_sensitive: true,
},
// Template-loader paths: a tainted `name` lets the attacker swap the
// resolved template behind the renderer. Mako's `TemplateLookup.get_template`
// and Jinja2's `Environment.get_template` / `select_template` /
// `loader.get_source` all take a template name (path-like) as arg 0.
// Modeling these as SSTI sinks captures the loader-path attack — the
// file resolver itself becomes the gadget when the name is attacker-controlled.
LabelRule {
matchers: &[
"TemplateLookup.get_template",
"Environment.get_template",
"Environment.select_template",
"loader.get_source",
// Bare-suffix forms for the idiomatic instance shapes
// (`env.get_template(name)`, `lookup.get_template(name)`).
"get_template",
"select_template",
],
label: DataLabel::Sink(Cap::SSTI),
case_sensitive: true,
},
// ─── XXE sinks ───
//
// Python's stock `xml.sax.parseString` / `xml.sax.parse` parsers are
// XXE-vulnerable by default; `xml.dom.minidom.parseString` /
// `xml.dom.minidom.parse` likewise resolve external entities through
// the underlying expat parser unless the entity-loader is hardened.
// Each entry is the dotted-module suffix; bare `parseString` / `parse`
// are intentionally avoided to prevent collisions with JSON parsers
// (`json.loads`), `lxml.etree.fromstring` is excluded — modern lxml
// disables external entities by default and would over-fire here.
LabelRule {
matchers: &[
"xml.sax.parseString",
"xml.sax.parse",
"xml.dom.minidom.parseString",
"xml.dom.minidom.parse",
"xml.dom.pulldom.parseString",
"xml.dom.pulldom.parse",
],
label: DataLabel::Sink(Cap::XXE),
case_sensitive: true,
},
// `defusedxml.*` is the canonical hardened drop-in: every parser in
// the package strips external-entity / DTD resolution and raises on
// the patterns that would otherwise XXE. Treat any defusedxml
// call as an XXE sanitizer.
LabelRule {
matchers: &[
"defusedxml.ElementTree.fromstring",
"defusedxml.ElementTree.parse",
"defusedxml.minidom.parseString",
"defusedxml.minidom.parse",
"defusedxml.sax.parseString",
"defusedxml.sax.parse",
"defusedxml.pulldom.parseString",
"defusedxml.pulldom.parse",
"defusedxml.lxml.fromstring",
"defusedxml.lxml.parse",
],
label: DataLabel::Sanitizer(Cap::XXE),
case_sensitive: true,
},
];
/// Method-call validators that strip caps from their *receiver* (and
@ -1041,6 +1247,55 @@ pub static GATED_SINKS: &[SinkGate] = &[
},
];
/// Prototype-pollution-style gates for Python. Opt-in via the
/// `NYX_PYTHON_PROTO_POLLUTION` env var (see
/// `super::env_python_proto_pollution`); when enabled they are merged
/// into the language's `GATED_REGISTRY` slice at startup.
///
/// Coverage is deliberately narrow: the `dict.update(target, src)`
/// class-method form (where the first arg is the target and the second
/// is the source) is the canonical attack shape for `__class__` /
/// `__dict__` pollution in Python frameworks that thread user input
/// through configuration objects. The bound-method form
/// (`config.update(req_data)`) is handled by the suffix-matched
/// `dict.update` callee text only when the receiver text literally
/// equals `dict`, keeping the gate from over-firing on every `update`
/// method in the codebase.
pub static PROTO_POLLUTION_GATES: &[SinkGate] = &[
// `dict.update(target, src)` — class-method form. Argument-role
// gating: only `src` (arg 1) taint activates; tainted target alone
// is benign.
SinkGate {
callee_matcher: "dict.update",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::PROTOTYPE_POLLUTION),
case_sensitive: true,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
// `obj.__dict__.update(src)` — instance-attribute pollution shape.
SinkGate {
callee_matcher: "__dict__.update",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::PROTOTYPE_POLLUTION),
case_sensitive: true,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
];
pub static KINDS: Map<&'static str, Kind> = phf_map! {
// control-flow
"if_statement" => Kind::If,