Performance and precision pass (#64)

This commit is contained in:
Eli Peter 2026-05-04 19:58:04 -04:00 committed by GitHub
parent c7c5e0f3a1
commit fb698d2c27
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
97 changed files with 9932 additions and 517 deletions

View file

@ -799,6 +799,33 @@ fn phase_c_auth_rules_for_lang(lang_slug: &str) -> Vec<RuntimeLabelRule> {
}
}
/// Look up a *receiver-side* validator for the given callee name.
///
/// Returns `Some(cap)` when the callee is registered as a method-call
/// validator that strips `cap` from its receiver (and other call
/// equivalents) on success. Distinct from the `Sanitizer` label,
/// which clears caps from the *return value*. Used by the Call
/// transfer to model idioms like `path.relative_to(base)` whose
/// observable effect on data flow is "the receiver is validated"
/// rather than "the return value is sanitised".
pub fn lookup_receiver_validator(lang: &str, callee: &str) -> Option<Cap> {
let table: &[(&str, Cap)] = match lang {
"python" | "py" => python::RECEIVER_VALIDATORS,
_ => return None,
};
let head = callee.split(['(', '<']).next().unwrap_or(callee);
let trimmed = head.trim().as_bytes();
let normalized = normalize_chained_call(callee);
let norm = normalized.as_bytes();
for (name, cap) in table {
let m = name.as_bytes();
if match_suffix_cs(trimmed, m, false) || match_suffix_cs(norm, m, false) {
return Some(*cap);
}
}
None
}
/// Public re-export used by `ParsedFile::from_source` to
/// augment per-file rule sets when imports reveal frameworks that the
/// manifest-level detector missed.
@ -1471,6 +1498,26 @@ pub fn custom_rule_id(lang: &str, kind: &str, matchers: &[String]) -> String {
mod tests {
use super::*;
#[test]
fn receiver_validator_python_relative_to() {
// Bare method name fires.
assert_eq!(
lookup_receiver_validator("python", "relative_to"),
Some(Cap::FILE_IO)
);
// Dotted-method-call form (chained receiver).
assert_eq!(
lookup_receiver_validator("python", "filepath.relative_to"),
Some(Cap::FILE_IO)
);
// Other languages without a registry entry return None.
assert_eq!(lookup_receiver_validator("rust", "relative_to"), None);
assert_eq!(lookup_receiver_validator("javascript", "relative_to"), None);
// Unrelated callees return None.
assert_eq!(lookup_receiver_validator("python", "resolve"), None);
assert_eq!(lookup_receiver_validator("python", "joinpath"), None);
}
#[test]
fn bare_method_name_strips_chain() {
// No-dot input → returned as-is.

View file

@ -133,10 +133,15 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: false,
},
// NOTE: `file_get_contents` can fetch URLs (SSRF vector) and local files (LFI vector).
// As a Sink(SSRF) it only fires when the argument is tainted.
// NOTE: `file_get_contents` and `fopen` can fetch URLs (SSRF vector) and
// local files (LFI vector — `file://` scheme). As a Sink(SSRF) they only
// fire when the argument is tainted. `fopen` is the canonical low-level
// stream-opening API used by media-import / OEmbed / podcast pipelines
// (CVE-2026-33486 in roadiz/documents wraps `fopen($url, 'r')` in a
// public `DownloadedFile::fromUrl` static method that any authenticated
// backend caller can drive with attacker-controlled URLs).
LabelRule {
matchers: &["file_get_contents", "curl_exec"],
matchers: &["file_get_contents", "curl_exec", "fopen"],
label: DataLabel::Sink(Cap::SSRF),
case_sensitive: false,
},
@ -232,6 +237,11 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
"anonymous_function_creation_expression" => Kind::Function,
"arrow_function" => Kind::Function,
"class_declaration" => Kind::Block,
"declaration_list" => Kind::Block,
"interface_declaration" => Kind::Block,
"trait_declaration" => Kind::Block,
"enum_declaration" => Kind::Block,
"enum_declaration_list" => Kind::Block,
// data-flow
"function_call_expression" => Kind::CallFn,

View file

@ -25,6 +25,10 @@ pub static RULES: &[LabelRule] = &[
"request.url",
"request.base_url",
"request.host",
"request.match_info",
"request.rel_url",
"request.query",
"request.path",
// Common alias: from flask import request as flask_request
"flask_request.args",
"flask_request.form",
@ -227,7 +231,15 @@ pub static RULES: &[LabelRule] = &[
case_sensitive: false,
},
LabelRule {
matchers: &["send_file", "send_from_directory"],
matchers: &[
"send_file",
"send_from_directory",
// aiohttp file response — sends file at the supplied path,
// semantically identical to Flask's send_file (CVE-2024-23334).
"FileResponse",
"web.FileResponse",
"aiohttp.web.FileResponse",
],
label: DataLabel::Sink(Cap::FILE_IO),
case_sensitive: false,
},
@ -274,6 +286,25 @@ pub static RULES: &[LabelRule] = &[
},
];
/// Method-call validators that strip caps from their *receiver* (and
/// any equivalence-class-shaped args) on success, instead of clearing
/// the return value. Distinct from `RULES`'s `Sanitizer` label, which
/// only clears the return — a poor fit for idioms whose effect is
/// raise-on-failure rather than value-replacement.
///
/// Modeled idioms:
///
/// * `path.relative_to(base)` (pathlib) — raises `ValueError` if `path`
/// is not under `base`. After a successful return, the receiver is
/// path-contained in `base`. Strips `Cap::FILE_IO`. Motivated by
/// CVE-2024-23334 (aiohttp StaticResource symlink-bypass) where the
/// patched code calls `filepath.relative_to(self._directory)` inside
/// a try/except and serves `filepath` afterwards.
pub static RECEIVER_VALIDATORS: &[(&str, Cap)] = &[
("relative_to", Cap::FILE_IO),
(".relative_to", Cap::FILE_IO),
];
pub static GATED_SINKS: &[SinkGate] = &[
// Legacy single-kwarg gate retained for back-compat: Popen(cmd, shell=True).
SinkGate {