mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-18 20:15:14 +02:00
Precision pass on auth and resource analysis (#63)
This commit is contained in:
parent
064801a3a4
commit
c7c5e0f3a1
62 changed files with 4248 additions and 138 deletions
|
|
@ -585,13 +585,13 @@ fn has_first_char_absolute_check(clause: &str) -> bool {
|
|||
if bytes[i] == b'[' && bytes[i + 1] == b'0' && bytes[i + 2] == b']' {
|
||||
let lo = i.saturating_sub(32);
|
||||
let hi = (i + 3 + 32).min(bytes.len());
|
||||
let window = &clause[lo..hi];
|
||||
if (window.contains("==") || window.contains("!="))
|
||||
&& (window.contains("'/'")
|
||||
|| window.contains("'\\\\'")
|
||||
|| window.contains("\"/\"")
|
||||
|| window.contains("\"\\\\\""))
|
||||
{
|
||||
let window = &bytes[lo..hi];
|
||||
let has_op = window.windows(2).any(|w| w == b"==" || w == b"!=");
|
||||
let has_lit = window.windows(3).any(|w| w == b"'/'")
|
||||
|| window.windows(4).any(|w| w == b"'\\\\'")
|
||||
|| window.windows(3).any(|w| w == b"\"/\"")
|
||||
|| window.windows(4).any(|w| w == b"\"\\\\\"");
|
||||
if has_op && has_lit {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -1569,6 +1569,18 @@ mod tests {
|
|||
);
|
||||
// Negative: subscript but no equality op
|
||||
assert_eq!(classify_path_rejection_atom("s[0]"), PathRejection::None);
|
||||
// Regression: multibyte char inside the 32-byte search window must not
|
||||
// panic on a non-char-boundary slice (fuzz crash repro).
|
||||
let s = format!("{}s[0] == '/'", "—".repeat(20));
|
||||
assert_eq!(
|
||||
classify_path_rejection_atom(&s),
|
||||
PathRejection::AbsoluteSlash
|
||||
);
|
||||
let s2 = format!("s[0] == '/'{}", "—".repeat(20));
|
||||
assert_eq!(
|
||||
classify_path_rejection_atom(&s2),
|
||||
PathRejection::AbsoluteSlash
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -16,12 +16,15 @@ pub struct AuthFinding {
|
|||
pub fn run_checks(model: &AuthorizationModel, rules: &AuthAnalysisRules) -> Vec<AuthFinding> {
|
||||
let mut findings = Vec::new();
|
||||
let web_signal = model.lang_web_framework_signal;
|
||||
let lang = model.lang.as_str();
|
||||
findings.extend(check_admin_routes(model, rules));
|
||||
findings.extend(check_ownership_gaps(model, rules, web_signal));
|
||||
findings.extend(check_partial_batch_authorization(model, rules, web_signal));
|
||||
findings.extend(check_stale_authorization(model, rules, web_signal));
|
||||
findings.extend(check_ownership_gaps(model, rules, web_signal, lang));
|
||||
findings.extend(check_partial_batch_authorization(
|
||||
model, rules, web_signal, lang,
|
||||
));
|
||||
findings.extend(check_stale_authorization(model, rules, web_signal, lang));
|
||||
findings.extend(check_token_override_without_validation(
|
||||
model, rules, web_signal,
|
||||
model, rules, web_signal, lang,
|
||||
));
|
||||
findings.sort_by(|a, b| a.span.cmp(&b.span).then_with(|| a.rule_id.cmp(&b.rule_id)));
|
||||
findings.dedup_by(|a, b| a.span == b.span && a.rule_id == b.rule_id);
|
||||
|
|
@ -70,11 +73,12 @@ fn check_ownership_gaps(
|
|||
model: &AuthorizationModel,
|
||||
rules: &AuthAnalysisRules,
|
||||
web_signal: Option<bool>,
|
||||
lang: &str,
|
||||
) -> Vec<AuthFinding> {
|
||||
let mut findings = Vec::new();
|
||||
|
||||
for unit in &model.units {
|
||||
if !unit_has_user_input_evidence(unit, web_signal) {
|
||||
if !unit_has_user_input_evidence(unit, web_signal, lang) {
|
||||
continue;
|
||||
}
|
||||
for op in &unit.operations {
|
||||
|
|
@ -123,11 +127,12 @@ fn check_partial_batch_authorization(
|
|||
model: &AuthorizationModel,
|
||||
rules: &AuthAnalysisRules,
|
||||
web_signal: Option<bool>,
|
||||
lang: &str,
|
||||
) -> Vec<AuthFinding> {
|
||||
let mut findings = Vec::new();
|
||||
|
||||
for unit in &model.units {
|
||||
if !unit_has_user_input_evidence(unit, web_signal) {
|
||||
if !unit_has_user_input_evidence(unit, web_signal, lang) {
|
||||
continue;
|
||||
}
|
||||
for op in &unit.operations {
|
||||
|
|
@ -178,11 +183,12 @@ fn check_stale_authorization(
|
|||
model: &AuthorizationModel,
|
||||
rules: &AuthAnalysisRules,
|
||||
web_signal: Option<bool>,
|
||||
lang: &str,
|
||||
) -> Vec<AuthFinding> {
|
||||
let mut findings = Vec::new();
|
||||
|
||||
for unit in &model.units {
|
||||
if !unit_has_user_input_evidence(unit, web_signal) {
|
||||
if !unit_has_user_input_evidence(unit, web_signal, lang) {
|
||||
continue;
|
||||
}
|
||||
for op in unit.operations.iter().filter(|operation| {
|
||||
|
|
@ -226,6 +232,7 @@ fn check_token_override_without_validation(
|
|||
model: &AuthorizationModel,
|
||||
rules: &AuthAnalysisRules,
|
||||
web_signal: Option<bool>,
|
||||
lang: &str,
|
||||
) -> Vec<AuthFinding> {
|
||||
let mut findings = Vec::new();
|
||||
|
||||
|
|
@ -239,7 +246,7 @@ fn check_token_override_without_validation(
|
|||
// call shape happens to look token-y (`account.token = …;
|
||||
// account.save()`). Gate on positive user-input evidence so
|
||||
// these pure backend units are never claimed as a token flow.
|
||||
if !unit_has_user_input_evidence(unit, web_signal) {
|
||||
if !unit_has_user_input_evidence(unit, web_signal, lang) {
|
||||
continue;
|
||||
}
|
||||
let Some(token_lookup) = unit
|
||||
|
|
@ -938,7 +945,7 @@ fn is_id_like_name(name: &str) -> bool {
|
|||
/// pure utility helpers fail all three conditions and are skipped ,
|
||||
/// they cannot, by construction, be the entry point of an
|
||||
/// authentication-bearing flow.
|
||||
fn unit_has_user_input_evidence(unit: &AnalysisUnit, web_signal: Option<bool>) -> bool {
|
||||
fn unit_has_user_input_evidence(unit: &AnalysisUnit, web_signal: Option<bool>, lang: &str) -> bool {
|
||||
if unit.kind == AnalysisUnitKind::RouteHandler {
|
||||
return true;
|
||||
}
|
||||
|
|
@ -960,7 +967,9 @@ fn unit_has_user_input_evidence(unit: &AnalysisUnit, web_signal: Option<bool>) -
|
|||
if !unit.context_inputs.is_empty() {
|
||||
return true;
|
||||
}
|
||||
unit.params.iter().any(|p| is_external_input_param_name(p))
|
||||
unit.params
|
||||
.iter()
|
||||
.any(|p| is_external_input_param_name_for_lang(p, lang))
|
||||
}
|
||||
|
||||
/// Parameter-name heuristic: does this name carry external/user input
|
||||
|
|
@ -974,7 +983,33 @@ fn unit_has_user_input_evidence(unit: &AnalysisUnit, web_signal: Option<bool>) -
|
|||
/// Used by `unit_has_user_input_evidence` to recognise helper
|
||||
/// functions that, while not registered as route handlers, are
|
||||
/// clearly invoked with caller-supplied identifiers or request data.
|
||||
#[cfg(test)]
|
||||
fn is_external_input_param_name(name: &str) -> bool {
|
||||
is_external_input_param_name_for_lang(name, "")
|
||||
}
|
||||
|
||||
/// Lang-aware variant of [`is_external_input_param_name`]. When `lang`
|
||||
/// names a language whose framework conventions don't use the generic
|
||||
/// typed-extractor names from the JS/TS/Python ecosystems, the
|
||||
/// framework-name allow-list is narrowed accordingly.
|
||||
///
|
||||
/// Currently narrowed for Go. In Go the names `ctx` / `context` /
|
||||
/// `info` / `body` / `path` / `payload` / `dto` / `form` / `query` are
|
||||
/// not framework-request indicators — they're, respectively,
|
||||
/// `context.Context` (cancellation/value-bag from the stdlib) and a
|
||||
/// menagerie of struct-pointer payload params (`info *PackageInfo`,
|
||||
/// `opts *FooOptions`). Go's actual HTTP frameworks bind the request
|
||||
/// to a per-framework typed param (`r *http.Request`, `c *gin.Context`,
|
||||
/// `c echo.Context`, `c *fiber.Ctx`, `ctx *context.APIContext`); these
|
||||
/// arrive at the gate via `kind == RouteHandler` (set by the route
|
||||
/// extractor) or via the type-aware param filter in
|
||||
/// `extract::common::collect_param_names` (which keeps `ctx` only when
|
||||
/// its type is **not** the stdlib `context.Context`).
|
||||
///
|
||||
/// Real-repo trigger: `/Users/elipeter/oss/gitea` ─ ~1900
|
||||
/// `go.auth.missing_ownership_check` findings on backend helpers whose
|
||||
/// only "user-input evidence" was a `ctx context.Context` param name.
|
||||
fn is_external_input_param_name_for_lang(name: &str, lang: &str) -> bool {
|
||||
// Pytest / unittest.mock convention: parameters injected by
|
||||
// `@mock.patch(...)` decorators are universally named
|
||||
// `mock_<thing>` (`mock_project_id`, `mock_session`,
|
||||
|
|
@ -1011,6 +1046,13 @@ fn is_external_input_param_name(name: &str) -> bool {
|
|||
// matching on the name is a reliable proxy for the typed
|
||||
// extractor binding. Bare `c` is too common (incidental local
|
||||
// variable) to include without an additional type signal.
|
||||
if matches!(lang, "go") {
|
||||
// Go's allow-list: only `req` / `request` (the stdlib
|
||||
// `*http.Request` convention). All other names from the
|
||||
// generic allow-list have language-specific meanings in Go
|
||||
// that aren't user-input ─ see fn doc-comment above.
|
||||
return matches!(lower.as_str(), "req" | "request");
|
||||
}
|
||||
matches!(
|
||||
lower.as_str(),
|
||||
"req"
|
||||
|
|
@ -1361,23 +1403,23 @@ mod tests {
|
|||
// Function with no params and no context_inputs (Celery task
|
||||
// shape), must NOT count as user-input-bearing.
|
||||
let mut unit = empty_unit();
|
||||
assert!(!unit_has_user_input_evidence(&unit, None));
|
||||
assert!(!unit_has_user_input_evidence(&unit, None, ""));
|
||||
|
||||
// Adding internal-typed params (apps, schema_editor, Django
|
||||
// migration RunPython callback shape) keeps the gate closed.
|
||||
unit.params.push("apps".into());
|
||||
unit.params.push("schema_editor".into());
|
||||
assert!(!unit_has_user_input_evidence(&unit, None));
|
||||
assert!(!unit_has_user_input_evidence(&unit, None, ""));
|
||||
|
||||
// pytest hook shape: (config, items), gate stays closed.
|
||||
let mut unit = empty_unit();
|
||||
unit.params.push("config".into());
|
||||
unit.params.push("items".into());
|
||||
assert!(!unit_has_user_input_evidence(&unit, None));
|
||||
assert!(!unit_has_user_input_evidence(&unit, None, ""));
|
||||
|
||||
// Adding an id-like param flips the gate open.
|
||||
unit.params.push("doc_id".into());
|
||||
assert!(unit_has_user_input_evidence(&unit, None));
|
||||
assert!(unit_has_user_input_evidence(&unit, None, ""));
|
||||
|
||||
// Token-named param flips the gate open (Express helper
|
||||
// `acceptInvitation(token, currentUser, roleOverride)`).
|
||||
|
|
@ -1385,23 +1427,23 @@ mod tests {
|
|||
unit.params.push("token".into());
|
||||
unit.params.push("currentUser".into());
|
||||
unit.params.push("roleOverride".into());
|
||||
assert!(unit_has_user_input_evidence(&unit, None));
|
||||
assert!(unit_has_user_input_evidence(&unit, None, ""));
|
||||
|
||||
// Framework request-name param flips the gate open
|
||||
// (Django/Flask `def view(request, project_id):`).
|
||||
let mut unit = empty_unit();
|
||||
unit.params.push("request".into());
|
||||
assert!(unit_has_user_input_evidence(&unit, None));
|
||||
assert!(unit_has_user_input_evidence(&unit, None, ""));
|
||||
|
||||
// Axum/Actix typed-extractor convention name flips it open.
|
||||
let mut unit = empty_unit();
|
||||
unit.params.push("path".into());
|
||||
assert!(unit_has_user_input_evidence(&unit, None));
|
||||
assert!(unit_has_user_input_evidence(&unit, None, ""));
|
||||
|
||||
// RouteHandler kind always wins, regardless of params.
|
||||
let mut unit = empty_unit();
|
||||
unit.kind = AnalysisUnitKind::RouteHandler;
|
||||
assert!(unit_has_user_input_evidence(&unit, None));
|
||||
assert!(unit_has_user_input_evidence(&unit, None, ""));
|
||||
}
|
||||
|
||||
/// Web-framework signal `Some(false)` (project's manifest was
|
||||
|
|
@ -1422,9 +1464,9 @@ mod tests {
|
|||
// every desktop helper.
|
||||
let mut unit = empty_unit();
|
||||
unit.params.push("session_id".into());
|
||||
assert!(unit_has_user_input_evidence(&unit, None));
|
||||
assert!(unit_has_user_input_evidence(&unit, Some(true)));
|
||||
assert!(!unit_has_user_input_evidence(&unit, Some(false)));
|
||||
assert!(unit_has_user_input_evidence(&unit, None, ""));
|
||||
assert!(unit_has_user_input_evidence(&unit, Some(true), ""));
|
||||
assert!(!unit_has_user_input_evidence(&unit, Some(false), ""));
|
||||
|
||||
// Step 1 (RouteHandler) still wins regardless of the gate.
|
||||
// RouteHandler kind is set by framework extractors (axum /
|
||||
|
|
@ -1432,7 +1474,7 @@ mod tests {
|
|||
// robust enough to bypass the project-level gate even when
|
||||
// the manifest doesn't name the framework.
|
||||
unit.kind = AnalysisUnitKind::RouteHandler;
|
||||
assert!(unit_has_user_input_evidence(&unit, Some(false)));
|
||||
assert!(unit_has_user_input_evidence(&unit, Some(false), ""));
|
||||
|
||||
// context_inputs arm: bare `session.foo` on a debug-session
|
||||
// handle (not an auth session) lands in `context_inputs` via
|
||||
|
|
@ -1448,9 +1490,9 @@ mod tests {
|
|||
index: None,
|
||||
span: (0, 0),
|
||||
});
|
||||
assert!(unit_has_user_input_evidence(&unit, None));
|
||||
assert!(unit_has_user_input_evidence(&unit, Some(true)));
|
||||
assert!(!unit_has_user_input_evidence(&unit, Some(false)));
|
||||
assert!(unit_has_user_input_evidence(&unit, None, ""));
|
||||
assert!(unit_has_user_input_evidence(&unit, Some(true), ""));
|
||||
assert!(!unit_has_user_input_evidence(&unit, Some(false), ""));
|
||||
}
|
||||
|
||||
/// `is_external_input_param_name` covers id-, token-, and
|
||||
|
|
@ -1499,6 +1541,47 @@ mod tests {
|
|||
assert!(!is_external_input_param_name("mocked_token"));
|
||||
}
|
||||
|
||||
/// Go-specific narrowing of the framework-request-name allow-list.
|
||||
///
|
||||
/// Go has no framework convention that uses the generic
|
||||
/// typed-extractor names from JS/TS/Python (`info`, `path`,
|
||||
/// `payload`, `body`, `dto`, `form`, `query`). In Go these are
|
||||
/// either struct-pointer payload params (`info *PackageInfo`),
|
||||
/// stdlib types (`ctx context.Context`), or local variables.
|
||||
/// The Go HTTP frameworks bind the request via per-framework typed
|
||||
/// params (`r *http.Request`, `c *gin.Context`, `c echo.Context`,
|
||||
/// `ctx *context.APIContext`), arriving at the gate via
|
||||
/// RouteHandler kind. Real-repo trigger:
|
||||
/// `/Users/elipeter/oss/gitea` ─ ~1900 helpers passing the gate
|
||||
/// solely on `ctx context.Context`.
|
||||
#[test]
|
||||
fn external_input_param_name_for_go_narrows_allowlist() {
|
||||
use super::is_external_input_param_name_for_lang as f;
|
||||
// ID-shaped + token-shaped names always fire (cross-language).
|
||||
assert!(f("user_id", "go"));
|
||||
assert!(f("repoID", "go"));
|
||||
assert!(f("access_token", "go"));
|
||||
// Stdlib `r *http.Request` convention preserved.
|
||||
assert!(f("req", "go"));
|
||||
assert!(f("request", "go"));
|
||||
// Names that Go does NOT use as a request indicator.
|
||||
assert!(!f("ctx", "go"));
|
||||
assert!(!f("context", "go"));
|
||||
assert!(!f("info", "go"));
|
||||
assert!(!f("body", "go"));
|
||||
assert!(!f("path", "go"));
|
||||
assert!(!f("payload", "go"));
|
||||
assert!(!f("dto", "go"));
|
||||
assert!(!f("form", "go"));
|
||||
assert!(!f("query", "go"));
|
||||
// Same names DO fire for non-Go languages (Express / NestJS /
|
||||
// FastAPI / Axum extractor conventions).
|
||||
assert!(f("ctx", "javascript"));
|
||||
assert!(f("body", "typescript"));
|
||||
assert!(f("path", "rust"));
|
||||
assert!(f("payload", "python"));
|
||||
}
|
||||
|
||||
/// Row-fetch exemption.
|
||||
///
|
||||
/// Row var declared at line 10; auth check naming the row appears
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ fn collect_top_level_from_node(
|
|||
}
|
||||
}
|
||||
}
|
||||
"program" | "source_file" | "module" | "class" | "class_declaration" | "class_body"
|
||||
"program" | "source_file" | "module" | "class_declaration" | "class_body"
|
||||
| "body_statement" => {
|
||||
for idx in 0..node.named_child_count() {
|
||||
let Some(child) = node.named_child(idx as u32) else {
|
||||
|
|
@ -113,10 +113,252 @@ fn collect_top_level_from_node(
|
|||
collect_top_level_from_node(child, bytes, rules, model, file_meta);
|
||||
}
|
||||
}
|
||||
// Ruby `class Foo; ... end`. Gate method descent through the
|
||||
// visibility / callback-target filter so private helpers and
|
||||
// `before_action :foo`-style callback targets are not emitted
|
||||
// as `Function` units (the upstream cause of
|
||||
// `rb.auth.missing_ownership_check` FPs on `set_X` row-fetch
|
||||
// helpers in mastodon / diaspora controllers). Non-method
|
||||
// class-body children (nested `class` / `module` /
|
||||
// `singleton_method`) still recurse normally.
|
||||
"class" => {
|
||||
let body = node.child_by_field_name("body");
|
||||
let visibility = body
|
||||
.map(|b| ruby_method_visibility(b, bytes))
|
||||
.unwrap_or_default();
|
||||
let callbacks = body
|
||||
.map(|b| ruby_callback_target_names(b, bytes))
|
||||
.unwrap_or_default();
|
||||
for idx in 0..node.named_child_count() {
|
||||
let Some(child) = node.named_child(idx as u32) else {
|
||||
continue;
|
||||
};
|
||||
if Some(child) == body {
|
||||
for body_idx in 0..child.named_child_count() {
|
||||
let Some(grand) = child.named_child(body_idx as u32) else {
|
||||
continue;
|
||||
};
|
||||
if grand.kind() == "method" {
|
||||
let name = function_name(grand, bytes).unwrap_or_default();
|
||||
if !name.is_empty()
|
||||
&& ruby_method_is_callback_or_private(
|
||||
&name,
|
||||
&visibility,
|
||||
&callbacks,
|
||||
)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
collect_top_level_from_node(grand, bytes, rules, model, file_meta);
|
||||
}
|
||||
} else {
|
||||
collect_top_level_from_node(child, bytes, rules, model, file_meta);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
|
||||
pub enum RubyVisibility {
|
||||
Public,
|
||||
Protected,
|
||||
Private,
|
||||
}
|
||||
|
||||
/// Walk a Ruby class body in source order and attribute each method
|
||||
/// definition's visibility, mirroring Ruby's `private` / `protected` /
|
||||
/// `public` directive semantics.
|
||||
///
|
||||
/// Two directive forms are recognised:
|
||||
/// 1. **Bare** (`private`). Tree-sitter parses these as a top-level
|
||||
/// `(identifier "private")` sibling. Toggles default visibility
|
||||
/// for every subsequent method.
|
||||
/// 2. **Targeted** (`private :foo, :bar`). Parsed as
|
||||
/// `(call method:identifier arguments:argument_list ...)`.
|
||||
/// Explicitly marks the named methods; does not change default.
|
||||
pub fn ruby_method_visibility(
|
||||
body: Node<'_>,
|
||||
bytes: &[u8],
|
||||
) -> std::collections::HashMap<String, RubyVisibility> {
|
||||
use crate::auth_analysis::config::matches_name;
|
||||
use std::collections::HashMap;
|
||||
|
||||
let mut map: HashMap<String, RubyVisibility> = HashMap::new();
|
||||
let mut current = RubyVisibility::Public;
|
||||
for child in named_children(body) {
|
||||
match child.kind() {
|
||||
"identifier" => {
|
||||
if let Some(vis) = ruby_visibility_for_directive(text(child, bytes).trim()) {
|
||||
current = vis;
|
||||
}
|
||||
}
|
||||
"call" => {
|
||||
let callee_full = call_name(child, bytes);
|
||||
let callee = bare_method_name(&callee_full);
|
||||
let Some(target_vis) = ruby_visibility_for_directive(callee) else {
|
||||
continue;
|
||||
};
|
||||
let arguments = child.child_by_field_name("arguments");
|
||||
let args: Vec<Node<'_>> = arguments
|
||||
.map(|node| named_children(node))
|
||||
.unwrap_or_default();
|
||||
if args.is_empty() {
|
||||
current = target_vis;
|
||||
continue;
|
||||
}
|
||||
let mut targeted_any = false;
|
||||
for arg in args {
|
||||
for name in ruby_symbol_names(arg, bytes) {
|
||||
if name.is_empty() {
|
||||
continue;
|
||||
}
|
||||
map.insert(name, target_vis);
|
||||
targeted_any = true;
|
||||
}
|
||||
if arg.kind() == "method"
|
||||
&& let Some(name_node) = arg.child_by_field_name("name")
|
||||
{
|
||||
let name = text(name_node, bytes);
|
||||
if !name.is_empty() {
|
||||
map.insert(name, target_vis);
|
||||
targeted_any = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if !targeted_any {
|
||||
current = target_vis;
|
||||
}
|
||||
let _ = matches_name;
|
||||
}
|
||||
"method" => {
|
||||
if let Some(name_node) = child.child_by_field_name("name") {
|
||||
let name = text(name_node, bytes);
|
||||
if !name.is_empty() {
|
||||
map.insert(name, current);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
map
|
||||
}
|
||||
|
||||
fn ruby_visibility_for_directive(name: &str) -> Option<RubyVisibility> {
|
||||
match name {
|
||||
"private" => Some(RubyVisibility::Private),
|
||||
"protected" => Some(RubyVisibility::Protected),
|
||||
"public" => Some(RubyVisibility::Public),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Collect names of methods registered as Rails filter callbacks
|
||||
/// (`before_action`, `after_action`, `around_action`, with their
|
||||
/// `prepend_*` / `append_*` / `skip_*` siblings, plus the legacy
|
||||
/// `*_filter` aliases). Such methods may be public but are invoked
|
||||
/// only as part of an action's request cycle, never as standalone
|
||||
/// routes — so emitting them as units produces spurious
|
||||
/// `missing_ownership_check` flags on the helper body's row fetches.
|
||||
pub fn ruby_callback_target_names(
|
||||
body: Node<'_>,
|
||||
bytes: &[u8],
|
||||
) -> std::collections::HashSet<String> {
|
||||
use std::collections::HashSet;
|
||||
|
||||
let mut targets: HashSet<String> = HashSet::new();
|
||||
for child in named_children(body) {
|
||||
if child.kind() != "call" {
|
||||
continue;
|
||||
}
|
||||
let callee_full = call_name(child, bytes);
|
||||
let callee = bare_method_name(&callee_full);
|
||||
if !ruby_is_filter_callback_directive(callee) {
|
||||
continue;
|
||||
}
|
||||
let Some(arguments) = child.child_by_field_name("arguments") else {
|
||||
continue;
|
||||
};
|
||||
for arg in named_children(arguments) {
|
||||
if arg.kind() == "pair" {
|
||||
continue;
|
||||
}
|
||||
for name in ruby_symbol_names(arg, bytes) {
|
||||
if name.is_empty() {
|
||||
continue;
|
||||
}
|
||||
targets.insert(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
targets
|
||||
}
|
||||
|
||||
fn ruby_is_filter_callback_directive(name: &str) -> bool {
|
||||
matches!(
|
||||
name,
|
||||
"before_action"
|
||||
| "after_action"
|
||||
| "around_action"
|
||||
| "prepend_before_action"
|
||||
| "prepend_after_action"
|
||||
| "prepend_around_action"
|
||||
| "append_before_action"
|
||||
| "append_after_action"
|
||||
| "append_around_action"
|
||||
| "skip_before_action"
|
||||
| "skip_after_action"
|
||||
| "skip_around_action"
|
||||
| "before_filter"
|
||||
| "after_filter"
|
||||
| "around_filter"
|
||||
| "prepend_before_filter"
|
||||
| "prepend_after_filter"
|
||||
| "prepend_around_filter"
|
||||
| "append_before_filter"
|
||||
| "append_after_filter"
|
||||
| "append_around_filter"
|
||||
| "skip_before_filter"
|
||||
| "skip_after_filter"
|
||||
| "skip_around_filter"
|
||||
)
|
||||
}
|
||||
|
||||
fn ruby_symbol_names(node: Node<'_>, bytes: &[u8]) -> Vec<String> {
|
||||
match node.kind() {
|
||||
"simple_symbol" | "hash_key_symbol" | "identifier" | "string" => {
|
||||
vec![
|
||||
strip_quotes(&text(node, bytes))
|
||||
.trim_start_matches(':')
|
||||
.to_string(),
|
||||
]
|
||||
}
|
||||
"array" => named_children(node)
|
||||
.into_iter()
|
||||
.flat_map(|child| ruby_symbol_names(child, bytes))
|
||||
.collect(),
|
||||
_ => Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ruby_method_is_callback_or_private(
|
||||
name: &str,
|
||||
visibility: &std::collections::HashMap<String, RubyVisibility>,
|
||||
callbacks: &std::collections::HashSet<String>,
|
||||
) -> bool {
|
||||
let vis = visibility
|
||||
.get(name)
|
||||
.copied()
|
||||
.unwrap_or(RubyVisibility::Public);
|
||||
if vis != RubyVisibility::Public {
|
||||
return true;
|
||||
}
|
||||
callbacks.contains(name)
|
||||
}
|
||||
|
||||
fn function_unit_from_var_declarator(
|
||||
node: Node<'_>,
|
||||
bytes: &[u8],
|
||||
|
|
@ -3184,6 +3426,52 @@ fn collect_param_names(
|
|||
out.push(name);
|
||||
}
|
||||
}
|
||||
// Go `parameter_declaration` / `variadic_parameter_declaration`:
|
||||
// tree-sitter-go shape exposes `name` (one or more identifiers)
|
||||
// and `type` (the param's static type) as named fields. C/C++
|
||||
// also use `parameter_declaration` but with a `declarator`
|
||||
// field instead of `name`, so the `name`-field gate
|
||||
// distinguishes Go from C/C++ shapes without language plumbing.
|
||||
//
|
||||
// Two engine improvements at this site, both Go-specific:
|
||||
//
|
||||
// 1. Drop the entire param when its type is a known
|
||||
// non-user-input stdlib type. The dominant case is
|
||||
// `ctx context.Context`, the canonical first param of
|
||||
// nearly every Go function (cancellation / deadline /
|
||||
// value-bag, NOT an HTTP request). Without this gate the
|
||||
// bare param name `ctx` matches the framework-request-name
|
||||
// allow-list in `is_external_input_param_name`, opening
|
||||
// `unit_has_user_input_evidence` on every internal helper.
|
||||
// 2. Descend only into the `name` field so type-segment
|
||||
// identifiers don't pollute the param-name set. Without
|
||||
// this scope, `info *PackageInfo` contributes both `info`
|
||||
// and `PackageInfo` to `unit.params`; `path *Path` would
|
||||
// contribute `path` and `Path`, etc. Mirrors the Rust
|
||||
// `parameter` arm below.
|
||||
//
|
||||
// Real-repo trigger: `/Users/elipeter/oss/gitea` ─ ~1900
|
||||
// `go.auth.missing_ownership_check` findings on backend
|
||||
// helpers whose only "user-input evidence" was the ubiquitous
|
||||
// `ctx context.Context` first param.
|
||||
"parameter_declaration" | "variadic_parameter_declaration"
|
||||
if node.child_by_field_name("name").is_some() =>
|
||||
{
|
||||
if let Some(type_node) = node.child_by_field_name("type")
|
||||
&& is_go_non_user_input_type(type_node, bytes)
|
||||
{
|
||||
return;
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children_by_field_name("name", &mut cursor) {
|
||||
if child.kind() == "identifier" {
|
||||
let name = text(child, bytes);
|
||||
if !name.is_empty() && !out.contains(&name) {
|
||||
out.push(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Rust `parameter` node: descend ONLY into the `pattern` field so
|
||||
// type-segment identifiers don't pollute the param-name set.
|
||||
// Without this scope, `dst: &std::path::Path` contributes `std`,
|
||||
|
|
@ -3294,6 +3582,48 @@ fn collect_param_names(
|
|||
}
|
||||
}
|
||||
|
||||
/// Recognise Go parameter types that are categorically not user-input
|
||||
/// bearing. Used by the Go arm of [`collect_param_names`] to drop the
|
||||
/// param entirely (rather than push its name into `unit.params` and
|
||||
/// trip the framework-request-name allow-list in
|
||||
/// `is_external_input_param_name`).
|
||||
///
|
||||
/// Conservative: only matches the stdlib `context.Context` /
|
||||
/// `context.CancelFunc` interface idioms. These are the dominant
|
||||
/// cluster ─ ~1900 findings on `/Users/elipeter/oss/gitea` ─ and there
|
||||
/// is no shape under which they carry user input.
|
||||
///
|
||||
/// Implementation note: tree-sitter-go's `qualified_type` exposes
|
||||
/// `package` (identifier) and `name` (type_identifier) as named fields.
|
||||
/// Pointer-wrapping is rare for these (they're already interfaces) but
|
||||
/// is handled defensively by descending through `pointer_type`.
|
||||
fn is_go_non_user_input_type(type_node: Node<'_>, bytes: &[u8]) -> bool {
|
||||
let mut node = type_node;
|
||||
// Strip a single layer of pointer indirection if present.
|
||||
if node.kind() == "pointer_type" {
|
||||
if let Some(inner) = node.child_by_field_name("type") {
|
||||
node = inner;
|
||||
} else if let Some(inner) = node.named_child(0) {
|
||||
node = inner;
|
||||
}
|
||||
}
|
||||
if node.kind() != "qualified_type" {
|
||||
return false;
|
||||
}
|
||||
let pkg = node
|
||||
.child_by_field_name("package")
|
||||
.map(|n| text(n, bytes))
|
||||
.unwrap_or_default();
|
||||
let name = node
|
||||
.child_by_field_name("name")
|
||||
.map(|n| text(n, bytes))
|
||||
.unwrap_or_default();
|
||||
matches!(
|
||||
(pkg.as_str(), name.as_str()),
|
||||
("context", "Context") | ("context", "CancelFunc")
|
||||
)
|
||||
}
|
||||
|
||||
/// Ascii-lowered id-shape predicate used by the Python typed-param
|
||||
/// fallback in `collect_param_names`. Mirrors
|
||||
/// `auth_analysis::checks::is_id_like_name` (cannot share that fn
|
||||
|
|
@ -4451,4 +4781,242 @@ mod tests {
|
|||
assert!(params.contains(&"b".to_string()), "got {:?}", params);
|
||||
assert!(!params.contains(&"u32".to_string()), "got {:?}", params);
|
||||
}
|
||||
|
||||
/// Go's stdlib `context.Context` is the canonical first-param of
|
||||
/// most functions but is NOT user input ─ it carries deadline /
|
||||
/// cancellation / value-bag, never an HTTP request. The Go arm of
|
||||
/// `collect_param_names` drops the param entirely when its type is
|
||||
/// `context.Context` so the bare name `ctx` doesn't trip the
|
||||
/// framework-request-name allow-list.
|
||||
///
|
||||
/// Real-repo motivation:
|
||||
/// `/Users/elipeter/oss/gitea/services/packages/packages.go::AddFileToExistingPackage`
|
||||
/// and ~1900 sibling helpers passed
|
||||
/// `unit_has_user_input_evidence` solely on this param.
|
||||
#[test]
|
||||
fn collect_param_names_go_drops_context_context_param() {
|
||||
use super::function_params;
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter::Language::from(tree_sitter_go::LANGUAGE))
|
||||
.unwrap();
|
||||
let src = b"package x\nfunc GetPackage(ctx context.Context, info *PackageInfo) {}\n";
|
||||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||||
let func = (0..tree.root_node().named_child_count())
|
||||
.filter_map(|i| tree.root_node().named_child(i as u32))
|
||||
.find(|n| n.kind() == "function_declaration")
|
||||
.expect("file should have a function_declaration");
|
||||
let params = function_params(func, src);
|
||||
assert!(
|
||||
!params.contains(&"ctx".to_string()),
|
||||
"ctx context.Context must be dropped: got {:?}",
|
||||
params
|
||||
);
|
||||
assert!(
|
||||
!params.contains(&"context".to_string()) && !params.contains(&"Context".to_string()),
|
||||
"type-segment idents must not leak: got {:?}",
|
||||
params
|
||||
);
|
||||
assert!(
|
||||
params.contains(&"info".to_string()),
|
||||
"non-context typed params keep their name: got {:?}",
|
||||
params
|
||||
);
|
||||
assert!(
|
||||
!params.contains(&"PackageInfo".to_string()),
|
||||
"type-segment idents must not leak from non-context params either: got {:?}",
|
||||
params
|
||||
);
|
||||
}
|
||||
|
||||
/// Per-framework `*context.APIContext` (gitea), `*gin.Context`,
|
||||
/// `iris.Context`, `*fiber.Ctx` and similar ARE user input ─ the
|
||||
/// type-aware filter must NOT drop these. The non-stdlib package
|
||||
/// name distinguishes them from the stdlib `context.Context`.
|
||||
#[test]
|
||||
fn collect_param_names_go_keeps_framework_context_param() {
|
||||
use super::function_params;
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter::Language::from(tree_sitter_go::LANGUAGE))
|
||||
.unwrap();
|
||||
let src = b"package x\nfunc Handle(ctx *context.APIContext) {}\n";
|
||||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||||
let func = (0..tree.root_node().named_child_count())
|
||||
.filter_map(|i| tree.root_node().named_child(i as u32))
|
||||
.find(|n| n.kind() == "function_declaration")
|
||||
.expect("file should have a function_declaration");
|
||||
let params = function_params(func, src);
|
||||
assert!(
|
||||
params.contains(&"ctx".to_string()),
|
||||
"framework-bearing ctx must survive: got {:?}",
|
||||
params
|
||||
);
|
||||
}
|
||||
|
||||
/// Multiple-name single-type Go declarations (`a, b int`) must
|
||||
/// surface every name.
|
||||
#[test]
|
||||
fn collect_param_names_go_multi_name_param_decl() {
|
||||
use super::function_params;
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter::Language::from(tree_sitter_go::LANGUAGE))
|
||||
.unwrap();
|
||||
let src = b"package x\nfunc Add(a, b int, ctx context.Context) {}\n";
|
||||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||||
let func = (0..tree.root_node().named_child_count())
|
||||
.filter_map(|i| tree.root_node().named_child(i as u32))
|
||||
.find(|n| n.kind() == "function_declaration")
|
||||
.expect("file should have a function_declaration");
|
||||
let params = function_params(func, src);
|
||||
assert!(params.contains(&"a".to_string()), "got {:?}", params);
|
||||
assert!(params.contains(&"b".to_string()), "got {:?}", params);
|
||||
assert!(!params.contains(&"ctx".to_string()), "got {:?}", params);
|
||||
assert!(!params.contains(&"int".to_string()), "got {:?}", params);
|
||||
}
|
||||
|
||||
mod ruby_visibility_and_callbacks {
|
||||
use super::super::{
|
||||
RubyVisibility, ruby_callback_target_names, ruby_method_is_callback_or_private,
|
||||
ruby_method_visibility,
|
||||
};
|
||||
use tree_sitter::{Node, Parser, Tree};
|
||||
|
||||
fn parse(src: &str) -> (Tree, Vec<u8>) {
|
||||
let mut parser = Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE))
|
||||
.unwrap();
|
||||
let bytes = src.as_bytes().to_vec();
|
||||
let tree = parser.parse(bytes.as_slice(), None).expect("parse");
|
||||
(tree, bytes)
|
||||
}
|
||||
|
||||
fn find_class_body<'a>(node: Node<'a>) -> Option<Node<'a>> {
|
||||
if node.kind() == "class" {
|
||||
return node.child_by_field_name("body");
|
||||
}
|
||||
for idx in 0..node.named_child_count() {
|
||||
let Some(child) = node.named_child(idx as u32) else {
|
||||
continue;
|
||||
};
|
||||
if let Some(body) = find_class_body(child) {
|
||||
return Some(body);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bare_private_directive_marks_subsequent_methods_private() {
|
||||
let src = "class C\n def public_a; end\n private\n def helper_b; end\n def helper_c; end\nend\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let body = find_class_body(tree.root_node()).expect("body");
|
||||
let vis = ruby_method_visibility(body, &bytes);
|
||||
assert_eq!(vis.get("public_a").copied(), Some(RubyVisibility::Public));
|
||||
assert_eq!(vis.get("helper_b").copied(), Some(RubyVisibility::Private));
|
||||
assert_eq!(vis.get("helper_c").copied(), Some(RubyVisibility::Private));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn targeted_private_marks_only_named_methods() {
|
||||
let src = "class C\n def a; end\n def b; end\n def c; end\n private :a, :c\nend\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let body = find_class_body(tree.root_node()).expect("body");
|
||||
let vis = ruby_method_visibility(body, &bytes);
|
||||
assert_eq!(vis.get("a").copied(), Some(RubyVisibility::Private));
|
||||
assert_eq!(vis.get("b").copied(), Some(RubyVisibility::Public));
|
||||
assert_eq!(vis.get("c").copied(), Some(RubyVisibility::Private));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn public_directive_re_opens_visibility() {
|
||||
let src = "class C\n private\n def a; end\n public\n def b; end\nend\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let body = find_class_body(tree.root_node()).expect("body");
|
||||
let vis = ruby_method_visibility(body, &bytes);
|
||||
assert_eq!(vis.get("a").copied(), Some(RubyVisibility::Private));
|
||||
assert_eq!(vis.get("b").copied(), Some(RubyVisibility::Public));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn protected_directive_recognised() {
|
||||
let src = "class C\n protected\n def helper; end\nend\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let body = find_class_body(tree.root_node()).expect("body");
|
||||
let vis = ruby_method_visibility(body, &bytes);
|
||||
assert_eq!(vis.get("helper").copied(), Some(RubyVisibility::Protected));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn before_action_collects_callback_target_names() {
|
||||
let src = "class C\n before_action :set_account\n before_action :set_user, only: [:show, :update]\n def show; end\n def set_account; end\n def set_user; end\nend\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let body = find_class_body(tree.root_node()).expect("body");
|
||||
let callbacks = ruby_callback_target_names(body, &bytes);
|
||||
assert!(callbacks.contains("set_account"));
|
||||
assert!(callbacks.contains("set_user"));
|
||||
// `only:` / `except:` keys must not pollute the target set.
|
||||
assert!(!callbacks.contains("show"));
|
||||
assert!(!callbacks.contains("update"));
|
||||
assert!(!callbacks.contains("only"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn before_action_block_form_yields_no_targets() {
|
||||
// Block form `before_action do ... end` carries no symbol arg.
|
||||
let src =
|
||||
"class C\n before_action do\n require_login\n end\n def show; end\nend\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let body = find_class_body(tree.root_node()).expect("body");
|
||||
let callbacks = ruby_callback_target_names(body, &bytes);
|
||||
assert!(callbacks.is_empty(), "got {:?}", callbacks);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skip_before_action_target_collected() {
|
||||
let src = "class C\n skip_before_action :authenticate_user!, only: [:index]\n def index; end\nend\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let body = find_class_body(tree.root_node()).expect("body");
|
||||
let callbacks = ruby_callback_target_names(body, &bytes);
|
||||
assert!(callbacks.contains("authenticate_user!"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn legacy_before_filter_alias_collected() {
|
||||
let src = "class C\n before_filter :legacy_helper\n def legacy_helper; end\nend\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let body = find_class_body(tree.root_node()).expect("body");
|
||||
let callbacks = ruby_callback_target_names(body, &bytes);
|
||||
assert!(callbacks.contains("legacy_helper"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn callback_target_or_private_predicate_combines_layers() {
|
||||
// Private method → suppressed.
|
||||
// Public callback target → suppressed.
|
||||
// Public non-callback method → kept.
|
||||
let src = "class C\n before_action :set_account\n def show; end\n def set_account; end\n private\n def helper; end\nend\n";
|
||||
let (tree, bytes) = parse(src);
|
||||
let body = find_class_body(tree.root_node()).expect("body");
|
||||
let visibility = ruby_method_visibility(body, &bytes);
|
||||
let callbacks = ruby_callback_target_names(body, &bytes);
|
||||
assert!(!ruby_method_is_callback_or_private(
|
||||
"show",
|
||||
&visibility,
|
||||
&callbacks
|
||||
));
|
||||
assert!(ruby_method_is_callback_or_private(
|
||||
"set_account",
|
||||
&visibility,
|
||||
&callbacks
|
||||
));
|
||||
assert!(ruby_method_is_callback_or_private(
|
||||
"helper",
|
||||
&visibility,
|
||||
&callbacks
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,11 +53,18 @@ pub fn extract_authorization_model(
|
|||
&actix_web::ActixWebExtractor,
|
||||
&rocket::RocketExtractor,
|
||||
];
|
||||
let mut model = AuthorizationModel::default();
|
||||
let mut model = AuthorizationModel {
|
||||
lang: lang.to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
for extractor in extractors {
|
||||
if extractor.supports(lang, framework_ctx) {
|
||||
model.extend(extractor.extract(tree, bytes, path, rules));
|
||||
let mut other = extractor.extract(tree, bytes, path, rules);
|
||||
// Preserve the canonical `lang` set above; sub-extractors
|
||||
// build their own default-initialised models with empty lang.
|
||||
other.lang = model.lang.clone();
|
||||
model.extend(other);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
use super::AuthExtractor;
|
||||
use super::common::{
|
||||
auth_check_from_call_site, build_function_unit, call_name, call_site_from_node, function_name,
|
||||
named_children, span, text,
|
||||
named_children, ruby_callback_target_names, ruby_method_is_callback_or_private,
|
||||
ruby_method_visibility, span, text,
|
||||
};
|
||||
use crate::auth_analysis::config::{AuthAnalysisRules, matches_name, strip_quotes};
|
||||
use crate::auth_analysis::model::{
|
||||
|
|
@ -102,6 +103,19 @@ fn maybe_collect_controller(
|
|||
);
|
||||
let controller_segment = underscore_segment(class_name.trim_end_matches("Controller"));
|
||||
let filter_directives = class_filter_directives(body, bytes);
|
||||
// Rails routes only dispatch to public instance methods that are
|
||||
// not registered as filter callbacks. Private / protected helpers
|
||||
// and methods named in `before_action :foo` / `after_action :bar`
|
||||
// run as part of an action's request cycle but are never
|
||||
// independently routable, so emitting them as RouteHandler units
|
||||
// produces FPs (e.g. `set_account` in
|
||||
// `mastodon/app/controllers/admin/accounts_controller.rb` does
|
||||
// `Account.find(params[:id])` inside a `private` block, with the
|
||||
// actual `authorize @account` check living in the public action
|
||||
// that triggers the callback). Skip them here; the action units
|
||||
// remain under analysis with their own auth context.
|
||||
let visibility = ruby_method_visibility(body, bytes);
|
||||
let callback_targets = ruby_callback_target_names(body, bytes);
|
||||
let controller_name = format!(
|
||||
"{}{}",
|
||||
if controller_namespace.is_empty() {
|
||||
|
|
@ -122,6 +136,9 @@ fn maybe_collect_controller(
|
|||
if action_name.is_empty() || action_name.ends_with('=') {
|
||||
continue;
|
||||
}
|
||||
if ruby_method_is_callback_or_private(&action_name, &visibility, &callback_targets) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let unit_idx = model.units.len();
|
||||
let route_name = format!("{controller_name}#{action_name}");
|
||||
|
|
|
|||
|
|
@ -362,6 +362,11 @@ pub struct AuthorizationModel {
|
|||
///
|
||||
/// Currently set only for Rust by `extract_authorization_model`.
|
||||
pub lang_web_framework_signal: Option<bool>,
|
||||
/// Source language of the file the model was built from. Used by
|
||||
/// `unit_has_user_input_evidence` to apply per-language narrowing
|
||||
/// of the framework-request-name allow-list. Empty string when no
|
||||
/// language was supplied (single-file unit-test paths).
|
||||
pub lang: String,
|
||||
}
|
||||
|
||||
impl AuthorizationModel {
|
||||
|
|
|
|||
|
|
@ -1390,6 +1390,116 @@ fn rust_nested_use_as_alias() {
|
|||
assert_eq!(b.original, "Read");
|
||||
}
|
||||
|
||||
/// `format!("{x}")` uses x even though x is captured via the format
|
||||
/// string's named-argument syntax rather than as a separate AST
|
||||
/// argument. Without this lift, taint stops at the macro boundary
|
||||
/// for any caller whose format string reads a tainted variable by
|
||||
/// name (matrix-rust-sdk CVE-2025-53549, log!() / println!() across
|
||||
/// most Rust 1.58+ codebases).
|
||||
#[test]
|
||||
fn rust_format_macro_named_arg_lifted_into_uses() {
|
||||
let src = b"fn f() { let x = 1; let y = format!(\"v={x}\"); }";
|
||||
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
|
||||
let mut found = false;
|
||||
for n in cfg.node_indices() {
|
||||
let info = &cfg[n];
|
||||
if info.taint.defines.as_deref() == Some("y") {
|
||||
assert!(
|
||||
info.taint.uses.iter().any(|u| u == "x"),
|
||||
"expected `x` in uses for `let y = format!(\"v={{x}}\")`; got {:?}",
|
||||
info.taint.uses
|
||||
);
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
assert!(found, "no node found defining `y`");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rust_format_macro_named_arg_with_format_spec() {
|
||||
let src = b"fn f() { let x = 1; let y = format!(\"{x:?}\"); }";
|
||||
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
|
||||
let mut found = false;
|
||||
for n in cfg.node_indices() {
|
||||
let info = &cfg[n];
|
||||
if info.taint.defines.as_deref() == Some("y") {
|
||||
assert!(
|
||||
info.taint.uses.iter().any(|u| u == "x"),
|
||||
"expected `x` lifted past `{{x:?}}` format spec; got {:?}",
|
||||
info.taint.uses
|
||||
);
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
assert!(found, "no node found defining `y`");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rust_format_macro_escaped_braces_not_lifted() {
|
||||
// `{{` and `}}` are escapes for literal `{` / `}`, NOT named
|
||||
// argument captures. No identifier should be lifted from the
|
||||
// sequence between them.
|
||||
let src = b"fn f() { let q = format!(\"{{x}}\"); }";
|
||||
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
|
||||
for n in cfg.node_indices() {
|
||||
let info = &cfg[n];
|
||||
if info.taint.defines.as_deref() == Some("q") {
|
||||
assert!(
|
||||
!info.taint.uses.iter().any(|u| u == "x"),
|
||||
"must not lift `x` from escaped `{{{{x}}}}`; got {:?}",
|
||||
info.taint.uses
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rust_format_macro_positional_index_not_lifted() {
|
||||
// Positional placeholders like `{0}` reference args by position,
|
||||
// not by name. Don't accidentally treat a digit as an identifier.
|
||||
let src = b"fn f() { let a = 1; let q = format!(\"{0}\", a); }";
|
||||
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
|
||||
for n in cfg.node_indices() {
|
||||
let info = &cfg[n];
|
||||
if info.taint.defines.as_deref() == Some("q") {
|
||||
assert!(
|
||||
!info.taint.uses.iter().any(|u| u == "0"),
|
||||
"must not lift digit-only positional placeholder; got {:?}",
|
||||
info.taint.uses
|
||||
);
|
||||
assert!(
|
||||
info.taint.uses.iter().any(|u| u == "a"),
|
||||
"expected `a` in uses (positional arg) for `format!(\"{{0}}\", a)`; got {:?}",
|
||||
info.taint.uses
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rust_println_macro_named_arg_lifted() {
|
||||
let src = b"fn f() { let user = String::from(\"x\"); println!(\"hi {user}\"); }";
|
||||
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
|
||||
let mut found = false;
|
||||
for n in cfg.node_indices() {
|
||||
let info = &cfg[n];
|
||||
if info.call.callee.as_deref() == Some("println") {
|
||||
assert!(
|
||||
info.taint.uses.iter().any(|u| u == "user"),
|
||||
"expected `user` lifted into println! uses; got {:?}",
|
||||
info.taint.uses
|
||||
);
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
assert!(found, "no println! macro_invocation node found");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn go_no_import_bindings() {
|
||||
let src = b"package main\nimport alias \"fmt\"\n";
|
||||
|
|
@ -2798,6 +2908,43 @@ fn go_for_loop_back_edge() {
|
|||
assert_loop_with_back_edge(&cfg, "go for");
|
||||
}
|
||||
|
||||
/// Pins the structural fix in `def_use` Kind::For arm for Go's
|
||||
/// `for ident, ident := range iter` shape. Tree-sitter wraps the binding
|
||||
/// pattern + iterable in a `range_clause` child of the `for_statement`
|
||||
/// (rather than direct `left`/`right` fields like Python / JS). Without
|
||||
/// this, the loop binding never becomes a CFG def and taint from the
|
||||
/// iterable cannot reach uses of the binding inside the loop body.
|
||||
/// Original gap: CVE-2026-41422 (daptin) goqu.L SQL injection.
|
||||
#[test]
|
||||
fn go_for_range_loop_binding_is_defined() {
|
||||
let src = b"package p\nfunc f(xs []string) { for _, p := range xs { use(p) } }";
|
||||
let ts_lang = Language::from(tree_sitter_go::LANGUAGE);
|
||||
let (cfg, _) = parse_and_build(src, "go", ts_lang);
|
||||
|
||||
let loop_node = cfg
|
||||
.node_indices()
|
||||
.find(|&n| matches!(cfg[n].kind, StmtKind::Loop))
|
||||
.expect("for-range loop should produce a Loop header");
|
||||
let info = &cfg[loop_node];
|
||||
let all_defs: Vec<&str> = info
|
||||
.taint
|
||||
.defines
|
||||
.iter()
|
||||
.map(String::as_str)
|
||||
.chain(info.taint.extra_defines.iter().map(String::as_str))
|
||||
.collect();
|
||||
assert!(
|
||||
all_defs.contains(&"p"),
|
||||
"loop binding `p` should appear in defines/extra_defines, got {:?}",
|
||||
all_defs
|
||||
);
|
||||
assert!(
|
||||
info.taint.uses.iter().any(|u| u == "xs"),
|
||||
"iterable `xs` should appear in uses, got {:?}",
|
||||
info.taint.uses
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ruby_while_back_edge() {
|
||||
let src = b"def f\n while cond\n body\n end\nend\n";
|
||||
|
|
|
|||
|
|
@ -83,6 +83,18 @@ pub(super) fn push_condition_node<'a>(
|
|||
let text = text_of(cond_ast, code)
|
||||
.map(|t| truncate_at_char_boundary(&t, MAX_CONDITION_TEXT_LEN).to_string());
|
||||
let span = (cond_ast.start_byte(), cond_ast.end_byte());
|
||||
// Mirror condition variables into `taint.uses` so the per-body
|
||||
// `SymbolInterner::from_cfg` pass interns them. Without this,
|
||||
// `apply_branch_predicates` (which calls `interner.get(var)` to
|
||||
// look up a Symbol id) silently no-ops on short-circuit branch
|
||||
// condition nodes — they have no `taint.uses` even though
|
||||
// `condition_vars` carries the variable names. Surfaced by
|
||||
// GHSA-h8cj-hpmg-636v: a `||`-decomposed validator like
|
||||
// `if (x == null || !regex.matcher(x).matches()) throw;` failed
|
||||
// to mark `x` as `validated_must` on the surviving branch
|
||||
// because the per-disjunct cond nodes (built via
|
||||
// `build_condition_chain`) didn't populate `taint.uses`.
|
||||
let uses_for_taint: Vec<String> = vars.clone();
|
||||
g.add_node(NodeInfo {
|
||||
kind: StmtKind::If,
|
||||
ast: AstMeta {
|
||||
|
|
@ -92,6 +104,10 @@ pub(super) fn push_condition_node<'a>(
|
|||
condition_text: text,
|
||||
condition_vars: vars,
|
||||
condition_negated: negated,
|
||||
taint: crate::cfg::TaintMeta {
|
||||
uses: uses_for_taint,
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1151,6 +1151,170 @@ pub(super) fn check_inner_call_args(node: Node, code: &[u8]) -> bool {
|
|||
true
|
||||
}
|
||||
|
||||
/// Extract identifiers captured by Rust format-string named-argument syntax
|
||||
/// (`format!("…{name}…")`, stable since 1.58) from a `macro_invocation`
|
||||
/// node. Returns the identifier names referenced by `{name}` /
|
||||
/// `{name:fmt-spec}` patterns inside the first `string_literal` child of
|
||||
/// the macro's `token_tree`.
|
||||
///
|
||||
/// Without this lifting, `let q = format!("...{x}...")` carries no `x` in
|
||||
/// its `uses` because `x` lives in the format string's bytes rather than
|
||||
/// as a separate AST argument node, so taint stops at the macro
|
||||
/// boundary. Mirrors the Python f-string interpolation lifting in
|
||||
/// `patterns/python.rs`.
|
||||
///
|
||||
/// Conservative recognition: only fires for known format-style macros
|
||||
/// (`format`, `print`/`println`, `eprint`/`eprintln`, `write`/`writeln`,
|
||||
/// `panic`, `format_args`, `assert`/`debug_assert`, the common `log`
|
||||
/// crate severity macros). Empty for any non-Rust call node, any other
|
||||
/// macro, or a token_tree whose first string is not present.
|
||||
pub(super) fn extract_rust_format_macro_named_idents(call_node: Node, code: &[u8]) -> Vec<String> {
|
||||
if call_node.kind() != "macro_invocation" {
|
||||
return Vec::new();
|
||||
}
|
||||
let Some(macro_node) = call_node.child_by_field_name("macro") else {
|
||||
return Vec::new();
|
||||
};
|
||||
let Some(macro_text) = text_of(macro_node, code) else {
|
||||
return Vec::new();
|
||||
};
|
||||
let leaf = macro_text
|
||||
.rsplit("::")
|
||||
.next()
|
||||
.unwrap_or(macro_text.as_str());
|
||||
if !is_rust_format_style_macro(leaf) {
|
||||
return Vec::new();
|
||||
}
|
||||
let tt = match call_node.child_by_field_name("token_tree") {
|
||||
Some(t) => t,
|
||||
None => {
|
||||
let mut cursor = call_node.walk();
|
||||
match call_node
|
||||
.children(&mut cursor)
|
||||
.find(|c| c.kind() == "token_tree")
|
||||
{
|
||||
Some(t) => t,
|
||||
None => return Vec::new(),
|
||||
}
|
||||
}
|
||||
};
|
||||
let mut cursor = tt.walk();
|
||||
let fmt_lit = match tt
|
||||
.children(&mut cursor)
|
||||
.find(|c| matches!(c.kind(), "string_literal" | "raw_string_literal"))
|
||||
{
|
||||
Some(n) => n,
|
||||
None => return Vec::new(),
|
||||
};
|
||||
let raw = match text_of(fmt_lit, code) {
|
||||
Some(s) => s,
|
||||
None => return Vec::new(),
|
||||
};
|
||||
let content = strip_literal_quotes(&raw, fmt_lit, code).unwrap_or_else(|| raw.clone());
|
||||
parse_rust_format_named_idents(&content)
|
||||
}
|
||||
|
||||
/// Walk `n` and any descendants, accumulating named-format-arg idents from
|
||||
/// every Rust `macro_invocation` reachable through structural expression
|
||||
/// children (calls, fields, await, references, blocks, ...). Lets the
|
||||
/// def-use collectors lift `format!("...{x}...")` named args through one
|
||||
/// or two levels of expression wrapping (e.g.
|
||||
/// `let q = format!("{x}").to_owned();` or RHS chained method calls).
|
||||
pub(super) fn extract_rust_format_macro_named_idents_in(n: Node, code: &[u8]) -> Vec<String> {
|
||||
let mut out = Vec::new();
|
||||
collect_format_macro_idents_recursive(n, code, &mut out, 0);
|
||||
out
|
||||
}
|
||||
|
||||
fn collect_format_macro_idents_recursive(n: Node, code: &[u8], out: &mut Vec<String>, depth: u32) {
|
||||
if depth > 6 {
|
||||
return;
|
||||
}
|
||||
if n.kind() == "macro_invocation" {
|
||||
for ident in extract_rust_format_macro_named_idents(n, code) {
|
||||
out.push(ident);
|
||||
}
|
||||
}
|
||||
let mut cursor = n.walk();
|
||||
for child in n.children(&mut cursor) {
|
||||
collect_format_macro_idents_recursive(child, code, out, depth + 1);
|
||||
}
|
||||
}
|
||||
|
||||
fn is_rust_format_style_macro(name: &str) -> bool {
|
||||
matches!(
|
||||
name,
|
||||
"format"
|
||||
| "print"
|
||||
| "println"
|
||||
| "eprint"
|
||||
| "eprintln"
|
||||
| "write"
|
||||
| "writeln"
|
||||
| "panic"
|
||||
| "format_args"
|
||||
| "assert"
|
||||
| "debug_assert"
|
||||
| "todo"
|
||||
| "unimplemented"
|
||||
| "unreachable"
|
||||
| "info"
|
||||
| "warn"
|
||||
| "error"
|
||||
| "debug"
|
||||
| "trace"
|
||||
)
|
||||
}
|
||||
|
||||
fn parse_rust_format_named_idents(s: &str) -> Vec<String> {
|
||||
let bytes = s.as_bytes();
|
||||
let mut out: Vec<String> = Vec::new();
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
let b = bytes[i];
|
||||
if b == b'{' {
|
||||
if i + 1 < bytes.len() && bytes[i + 1] == b'{' {
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
let start = i + 1;
|
||||
let mut j = start;
|
||||
while j < bytes.len() && bytes[j] != b'}' && bytes[j] != b':' {
|
||||
j += 1;
|
||||
}
|
||||
let ident_bytes = &bytes[start..j];
|
||||
if is_valid_rust_format_ident(ident_bytes) {
|
||||
if let Ok(name) = std::str::from_utf8(ident_bytes) {
|
||||
out.push(name.to_string());
|
||||
}
|
||||
}
|
||||
while j < bytes.len() && bytes[j] != b'}' {
|
||||
j += 1;
|
||||
}
|
||||
i = j + 1;
|
||||
} else if b == b'}' && i + 1 < bytes.len() && bytes[i + 1] == b'}' {
|
||||
i += 2;
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn is_valid_rust_format_ident(b: &[u8]) -> bool {
|
||||
if b.is_empty() {
|
||||
return false;
|
||||
}
|
||||
let first = b[0];
|
||||
if !(first.is_ascii_alphabetic() || first == b'_') {
|
||||
return false;
|
||||
}
|
||||
if b.iter().all(|c| c.is_ascii_digit()) {
|
||||
return false;
|
||||
}
|
||||
b.iter().all(|c| c.is_ascii_alphanumeric() || *c == b'_')
|
||||
}
|
||||
|
||||
/// Extract per-argument identifiers from a call node's argument list.
|
||||
/// Returns one `Vec<String>` per argument (in parameter-position order).
|
||||
/// Returns empty if argument list can't be found or contains spread/keyword args.
|
||||
|
|
@ -1663,6 +1827,11 @@ pub(super) fn def_use(
|
|||
collect_idents_with_paths(val, code, &mut idents, &mut paths);
|
||||
uses.extend(paths);
|
||||
uses.extend(idents);
|
||||
// Rust format-string named-arg capture: `let q =
|
||||
// format!("...{x}...")` reads `x`, but `x` lives in
|
||||
// the format-string bytes, not as a separate AST
|
||||
// argument node, so collect_idents misses it.
|
||||
uses.extend(extract_rust_format_macro_named_idents_in(val, code));
|
||||
}
|
||||
} else {
|
||||
// Try nested declarator pattern (JS/TS `lexical_declaration` → `variable_declarator`,
|
||||
|
|
@ -1716,6 +1885,7 @@ pub(super) fn def_use(
|
|||
collect_idents_with_paths(val_node, code, &mut idents, &mut paths);
|
||||
uses.extend(paths);
|
||||
uses.extend(idents);
|
||||
uses.extend(extract_rust_format_macro_named_idents_in(val_node, code));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1728,6 +1898,7 @@ pub(super) fn def_use(
|
|||
collect_idents_with_paths(ast, code, &mut idents, &mut paths);
|
||||
uses.extend(paths);
|
||||
uses.extend(idents);
|
||||
uses.extend(extract_rust_format_macro_named_idents_in(ast, code));
|
||||
}
|
||||
}
|
||||
(defs, uses, extra_defs)
|
||||
|
|
@ -1750,6 +1921,7 @@ pub(super) fn def_use(
|
|||
collect_idents_with_paths(rhs, code, &mut idents, &mut paths);
|
||||
uses.extend(paths);
|
||||
uses.extend(idents);
|
||||
uses.extend(extract_rust_format_macro_named_idents_in(rhs, code));
|
||||
}
|
||||
(defs, uses, vec![])
|
||||
}
|
||||
|
|
@ -1801,9 +1973,26 @@ pub(super) fn def_use(
|
|||
// `initializer`/`condition`/`increment`), so this path falls through
|
||||
// to the default-collecting behaviour for those, preserving today's
|
||||
// semantics.
|
||||
//
|
||||
// Go's `for ident := range iter` shape places the binding pattern
|
||||
// and iterable on a `range_clause` child of the `for_statement`
|
||||
// rather than as direct fields. Without the range_clause lookup
|
||||
// below, taint from the iterable never reaches the loop binding
|
||||
// (CVE-2026-41422 daptin: `c.QueryArray("col")` loop var `project`
|
||||
// flows into `goqu.L(project)` SQL_QUERY sink).
|
||||
Kind::For => {
|
||||
let left = ast.child_by_field_name("left");
|
||||
let right = ast.child_by_field_name("right");
|
||||
let mut left = ast.child_by_field_name("left");
|
||||
let mut right = ast.child_by_field_name("right");
|
||||
if left.is_none() && right.is_none() {
|
||||
let mut cursor = ast.walk();
|
||||
for child in ast.children(&mut cursor) {
|
||||
if child.kind() == "range_clause" {
|
||||
left = child.child_by_field_name("left");
|
||||
right = child.child_by_field_name("right");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if left.is_none() && right.is_none() {
|
||||
// C-style for, defer to default ident collection.
|
||||
let mut idents = Vec::new();
|
||||
|
|
|
|||
|
|
@ -69,6 +69,36 @@ pub(super) fn extract_param_meta<'a>(
|
|||
}
|
||||
return out;
|
||||
};
|
||||
// Java lambda shorthand: tree-sitter-java exposes the `parameters` field
|
||||
// on `lambda_expression` as either a single bare identifier (`cmd -> …`)
|
||||
// or an `inferred_parameters` wrapper around identifiers (`(a, b) -> …`).
|
||||
// Neither shape matches the formal_parameter / spread_parameter kinds in
|
||||
// PARAM_CONFIG, so the per-child loop below would otherwise see no
|
||||
// params and the lambda would appear parameterless. Without this, the
|
||||
// SSA pipeline treats the lambda binding as a free / closure-captured
|
||||
// variable, defeating the JS/TS / Java auto-seed distinction between
|
||||
// real handler-param formals and bubbled-up captures. Mirrors the JS/TS
|
||||
// arrow shorthand handled above.
|
||||
if func_node.kind() == "lambda_expression" {
|
||||
if params.kind() == "identifier" {
|
||||
if let Some(name) = text_of(params, code) {
|
||||
out.push((name, None, Vec::new()));
|
||||
return out;
|
||||
}
|
||||
} else if params.kind() == "inferred_parameters" {
|
||||
let mut cursor = params.walk();
|
||||
for child in params.named_children(&mut cursor) {
|
||||
if child.kind() == "identifier" {
|
||||
if let Some(name) = text_of(child, code) {
|
||||
out.push((name, None, Vec::new()));
|
||||
}
|
||||
}
|
||||
}
|
||||
if !out.is_empty() {
|
||||
return out;
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut cursor = params.walk();
|
||||
for child in params.children(&mut cursor) {
|
||||
// Self/this parameter (e.g. Rust's `self_parameter`)
|
||||
|
|
|
|||
|
|
@ -68,7 +68,23 @@ pub static RULES: &[LabelRule] = &[
|
|||
case_sensitive: false,
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["db.Query", "db.Exec", "db.QueryRow", "db.Prepare"],
|
||||
matchers: &[
|
||||
"db.Query",
|
||||
"db.Exec",
|
||||
"db.QueryRow",
|
||||
"db.Prepare",
|
||||
// goqu raw SQL literal builders: `goqu.L(s)` and the alias
|
||||
// `goqu.Lit(s)` insert `s` verbatim into the generated SQL with no
|
||||
// parameterisation. CVE-2026-41422 (daptin) loops a user-controlled
|
||||
// `c.QueryArray("column")` value into `goqu.L(project)` to allow
|
||||
// arbitrary SELECT subqueries. Modelled by name — `goqu.L` is the
|
||||
// documented escape hatch for raw SQL. The safe siblings
|
||||
// `goqu.I` (identifier), `goqu.C` (column), `goqu.T` (table),
|
||||
// `goqu.V` (parameterised value), and the typed function
|
||||
// constructors (`goqu.COUNT`, `goqu.SUM`, …) are not sinks.
|
||||
"goqu.L",
|
||||
"goqu.Lit",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
},
|
||||
|
|
@ -538,6 +554,16 @@ pub fn framework_rules(ctx: &FrameworkContext) -> Vec<RuntimeLabelRule> {
|
|||
"c.Cookie".into(),
|
||||
"c.BindJSON".into(),
|
||||
"c.ShouldBindJSON".into(),
|
||||
// Array-returning sibling helpers. `c.QueryArray("k")` returns
|
||||
// every value of repeated query param `k`; `c.PostFormArray`
|
||||
// and `c.GetQueryArray` / `c.GetPostFormArray` are the
|
||||
// documented `[]string` counterparts of the scalar methods
|
||||
// above. CVE-2026-41422 (daptin) reads `c.QueryArray("column")`
|
||||
// and loops directly into a SQL_QUERY sink.
|
||||
"c.QueryArray".into(),
|
||||
"c.GetQueryArray".into(),
|
||||
"c.PostFormArray".into(),
|
||||
"c.GetPostFormArray".into(),
|
||||
],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: false,
|
||||
|
|
|
|||
|
|
@ -103,6 +103,21 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// JDBC `Statement.execute(String)` / `executeBatch` / `executeLargeUpdate`.
|
||||
// Bare `execute` over-fires (Runnable.run callbacks, Executor.execute,
|
||||
// HttpClient.execute), so these only fire via type-qualified resolution
|
||||
// when the receiver's TypeKind is DatabaseConnection (the kind both
|
||||
// `Connection` and `Statement` map to in `class_name_to_type_kind`).
|
||||
// Surfaced by GHSA-h8cj-hpmg-636v (Appsmith FilterDataServiceCE.dropTable).
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"DatabaseConnection.execute",
|
||||
"DatabaseConnection.executeBatch",
|
||||
"DatabaseConnection.executeLargeUpdate",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["Class.forName"],
|
||||
label: DataLabel::Sink(Cap::CODE_EXEC),
|
||||
|
|
|
|||
|
|
@ -1626,6 +1626,30 @@ mod tests {
|
|||
assert_eq!(result, Some(DataLabel::Sink(Cap::FILE_IO)));
|
||||
}
|
||||
|
||||
// CVE Hunt Session 6 (Go CVE-2026-41422 daptin SQL injection): goqu's
|
||||
// raw SQL literal builders `goqu.L(s)` / `goqu.Lit(s)` insert `s`
|
||||
// verbatim into the generated query. Modeled by name as SQL_QUERY
|
||||
// sinks; the safe siblings `goqu.I` (identifier), `goqu.C`, `goqu.T`,
|
||||
// `goqu.V`, `goqu.SUM`, `goqu.COUNT`, etc. are typed and stay
|
||||
// unlabeled.
|
||||
#[test]
|
||||
fn classify_go_goqu_l_is_sql_query_sink() {
|
||||
let result = classify("go", "goqu.L", None);
|
||||
assert_eq!(result, Some(DataLabel::Sink(Cap::SQL_QUERY)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_go_goqu_lit_is_sql_query_sink() {
|
||||
let result = classify("go", "goqu.Lit", None);
|
||||
assert_eq!(result, Some(DataLabel::Sink(Cap::SQL_QUERY)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_go_goqu_i_is_not_sink() {
|
||||
let result = classify("go", "goqu.I", None);
|
||||
assert_eq!(result, None);
|
||||
}
|
||||
|
||||
// CVE Hunt Session 2 (Go CVE-2023-3188 Owncast SSRF):
|
||||
// `http.DefaultClient.Get/Post/Head/Do/PostForm` is the idiomatic Go
|
||||
// SSRF sink shape (`http.DefaultClient` is the package-level shared
|
||||
|
|
|
|||
104
src/ssa/lower.rs
104
src/ssa/lower.rs
|
|
@ -130,7 +130,7 @@ pub fn lower_to_ssa(
|
|||
scope: Option<&str>,
|
||||
scope_all: bool,
|
||||
) -> Result<SsaBody, SsaError> {
|
||||
lower_to_ssa_inner(cfg, entry, scope, scope_all, false, &[])
|
||||
lower_to_ssa_inner(cfg, entry, scope, scope_all, false, &[], false)
|
||||
}
|
||||
|
||||
/// Like `lower_to_ssa` but with formal parameter names supplied in declaration
|
||||
|
|
@ -144,7 +144,17 @@ pub fn lower_to_ssa_with_params(
|
|||
scope_all: bool,
|
||||
formal_params: &[String],
|
||||
) -> Result<SsaBody, SsaError> {
|
||||
lower_to_ssa_inner(cfg, entry, scope, scope_all, false, formal_params)
|
||||
// `with_params=true` signals "callers supplied an explicit formal list,
|
||||
// even if empty" (e.g. arrow `() => {…}` has zero formals). This lets
|
||||
// the synthetic-externals classifier distinguish "no formals info" from
|
||||
// "explicit empty formals" — closure captures of an arrow with empty
|
||||
// formals are still synthetic, not formals. Bug surfaced on outline's
|
||||
// jest test files: free vars bubbled up from nested arrow callbacks
|
||||
// (`body`, `userId`, `server.post`) became Params at the outer arrow's
|
||||
// entry, and the JS/TS auto-seed treated `userId` as a real handler
|
||||
// formal, producing 934 phantom taint findings. See
|
||||
// `taint/ssa_transfer/mod.rs::auto_seed_handler_params`.
|
||||
lower_to_ssa_inner(cfg, entry, scope, scope_all, false, formal_params, true)
|
||||
}
|
||||
|
||||
/// Like `lower_to_ssa` but with `scope_nop`: when true, all nodes are included
|
||||
|
|
@ -156,7 +166,7 @@ pub fn lower_to_ssa_scoped_nop(
|
|||
entry: NodeIndex,
|
||||
scope: Option<&str>,
|
||||
) -> Result<SsaBody, SsaError> {
|
||||
lower_to_ssa_inner(cfg, entry, scope, false, true, &[])
|
||||
lower_to_ssa_inner(cfg, entry, scope, false, true, &[], false)
|
||||
}
|
||||
|
||||
fn lower_to_ssa_inner(
|
||||
|
|
@ -166,6 +176,7 @@ fn lower_to_ssa_inner(
|
|||
scope_all: bool,
|
||||
scope_nop: bool,
|
||||
formal_params: &[String],
|
||||
with_params: bool,
|
||||
) -> Result<SsaBody, SsaError> {
|
||||
if cfg.node_count() == 0 {
|
||||
return Err(SsaError::EmptyCfg);
|
||||
|
|
@ -256,6 +267,7 @@ fn lower_to_ssa_inner(
|
|||
&filtered_edges,
|
||||
&external_vars,
|
||||
formal_params,
|
||||
with_params,
|
||||
&nop_nodes,
|
||||
);
|
||||
|
||||
|
|
@ -936,6 +948,7 @@ fn rename_variables(
|
|||
filtered_edges: &[(NodeIndex, NodeIndex, EdgeKind)],
|
||||
external_vars: &[String],
|
||||
formal_params: &[String],
|
||||
with_params: bool,
|
||||
nop_nodes: &HashSet<NodeIndex>,
|
||||
) -> (
|
||||
Vec<SsaBlock>,
|
||||
|
|
@ -1698,18 +1711,21 @@ fn rename_variables(
|
|||
// handler-name auto-seed in particular) can avoid treating closure
|
||||
// captures as if they were parameters of the function under analysis.
|
||||
//
|
||||
// **Conservative behaviour when `formal_params` is empty.** Several
|
||||
// call sites (`lower_to_ssa`, `lower_to_ssa_scoped_nop`) don't supply
|
||||
// formal parameter names; in that case we cannot distinguish formals
|
||||
// from free vars structurally, so we leave `synthetic_externals` empty
|
||||
// and the auto-seed pass keeps its pre-fix behaviour of treating every
|
||||
// `Param` op as a candidate. Only callers that pass a non-empty
|
||||
// `formal_params` slice (`lower_to_ssa_with_params`, used by the
|
||||
// findings pipeline's per-function lowering) opt into the
|
||||
// closure-capture distinction.
|
||||
// **Conservative behaviour when the caller didn't supply formal-param
|
||||
// info.** Several call sites (`lower_to_ssa`, `lower_to_ssa_scoped_nop`)
|
||||
// don't supply formal parameter names; in that case we cannot distinguish
|
||||
// formals from free vars structurally, so we leave `synthetic_externals`
|
||||
// empty and the auto-seed pass keeps its pre-fix behaviour of treating
|
||||
// every `Param` op as a candidate. Callers that opt in via
|
||||
// `lower_to_ssa_with_params` set `with_params=true`, signalling that
|
||||
// `formal_params` is the authoritative formal list — even when empty
|
||||
// (arrow `() => {…}`). In that case every external becomes synthetic
|
||||
// unless it appears in `formal_params`, so the auto-seed pass cannot
|
||||
// mistake a bubbled-up free var (like `userId` lifted from a nested
|
||||
// jest test callback) for a formal of the outer body.
|
||||
let mut synthetic_externals: HashSet<SsaValue> = HashSet::new();
|
||||
let formal_set: HashSet<&str> = formal_params.iter().map(|s| s.as_str()).collect();
|
||||
let track_synthetic = !formal_params.is_empty();
|
||||
let track_synthetic = with_params;
|
||||
if !external_vars.is_empty() {
|
||||
let entry_cfg_node = blocks_nodes[0][0];
|
||||
let mut synthetic_body = Vec::with_capacity(external_vars.len());
|
||||
|
|
@ -3904,6 +3920,68 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// REGRESSION: when the body takes a real handler-named formal
|
||||
/// (`userId`), that formal must NOT end up in
|
||||
/// `synthetic_externals` — the JS/TS / Java auto-seed pass relies
|
||||
/// on this distinction to seed only real formals as
|
||||
/// `Source(UserInput)` and skip closure captures. Companion
|
||||
/// integration coverage for the empty-formals shape (arrow
|
||||
/// `() => {…}` lifting bubbled-up free vars as synthetic) lives
|
||||
/// in `tests/fixtures/fp_guards/framework_jest_test_callback_arrow/`
|
||||
/// — that fixture exercises the full CFG construction path which
|
||||
/// this unit test cannot reproduce in isolation.
|
||||
#[test]
|
||||
fn arrow_with_handler_formal_keeps_param_non_synthetic() {
|
||||
let mut cfg: Cfg = Graph::new();
|
||||
let entry = cfg.add_node(NodeInfo {
|
||||
ast: crate::cfg::AstMeta {
|
||||
enclosing_func: Some("lookup".into()),
|
||||
..Default::default()
|
||||
},
|
||||
..make_node(StmtKind::Entry)
|
||||
});
|
||||
let use_node = cfg.add_node(NodeInfo {
|
||||
taint: TaintMeta {
|
||||
uses: vec!["userId".into()],
|
||||
..Default::default()
|
||||
},
|
||||
ast: crate::cfg::AstMeta {
|
||||
enclosing_func: Some("lookup".into()),
|
||||
..Default::default()
|
||||
},
|
||||
..make_node(StmtKind::Seq)
|
||||
});
|
||||
let exit = cfg.add_node(NodeInfo {
|
||||
ast: crate::cfg::AstMeta {
|
||||
enclosing_func: Some("lookup".into()),
|
||||
..Default::default()
|
||||
},
|
||||
..make_node(StmtKind::Exit)
|
||||
});
|
||||
cfg.add_edge(entry, use_node, EdgeKind::Seq);
|
||||
cfg.add_edge(use_node, exit, EdgeKind::Seq);
|
||||
|
||||
let formals = vec!["userId".to_string()];
|
||||
let body = lower_to_ssa_with_params(&cfg, entry, Some("lookup"), false, &formals)
|
||||
.expect("SSA lowering should succeed");
|
||||
let user_id_param = body
|
||||
.blocks
|
||||
.first()
|
||||
.and_then(|b| {
|
||||
b.body.iter().find(|inst| {
|
||||
matches!(inst.op, SsaOp::Param { .. })
|
||||
&& inst.var_name.as_deref() == Some("userId")
|
||||
})
|
||||
})
|
||||
.expect("userId Param should be present");
|
||||
assert!(
|
||||
!body.synthetic_externals.contains(&user_id_param.value),
|
||||
"real formal `userId` must not be marked synthetic; \
|
||||
synthetic_externals={:?}",
|
||||
body.synthetic_externals,
|
||||
);
|
||||
}
|
||||
|
||||
/// W1: a plain non-dotted assignment (`x = 1`) records nothing
|
||||
/// in `field_writes`. Strict-additive: existing behaviour is
|
||||
/// unchanged for non-field-write shapes.
|
||||
|
|
|
|||
|
|
@ -249,6 +249,14 @@ pub(crate) fn constructor_type(lang: Lang, callee: &str) -> Option<TypeKind> {
|
|||
"OkHttpClient" | "WebClient" | "RestTemplate" => Some(TypeKind::HttpClient),
|
||||
"getConnection" => Some(TypeKind::DatabaseConnection),
|
||||
"MongoClient" => Some(TypeKind::DatabaseConnection),
|
||||
// JDBC `conn.createStatement()` / `conn.prepareCall()` produce a
|
||||
// `Statement` / `CallableStatement` whose `.execute(sql)` is a
|
||||
// first-class SQL sink. Mapped to `DatabaseConnection` so the
|
||||
// type-qualified label `DatabaseConnection.execute` (in
|
||||
// `labels/java.rs`) fires for `s.execute(query)` calls without
|
||||
// widening the bare `execute` matcher. Surfaced by
|
||||
// GHSA-h8cj-hpmg-636v (Appsmith FilterDataServiceCE.dropTable).
|
||||
"createStatement" | "prepareCall" => Some(TypeKind::DatabaseConnection),
|
||||
"FileInputStream" | "FileOutputStream" | "FileReader" | "FileWriter"
|
||||
| "BufferedReader" | "BufferedWriter" => Some(TypeKind::FileHandle),
|
||||
"getWriter" | "getOutputStream" => Some(TypeKind::HttpResponse),
|
||||
|
|
|
|||
|
|
@ -718,6 +718,52 @@ impl DefaultTransfer<'_> {
|
|||
if let Some(ref def) = info.taint.defines
|
||||
&& let Some(def_sym) = self.get_sym(info, def)
|
||||
{
|
||||
// SAFE-FOR-FIELD-LHS: when the LHS is a member expression
|
||||
// (struct field / object property), do NOT track the field as
|
||||
// a separate resource — the parent struct/object owns the
|
||||
// field's lifecycle and the local function body cannot
|
||||
// observe whether/when the parent's destructor (or paired
|
||||
// Stop()/dispose() method on the parent) releases the
|
||||
// underlying storage. Still mark the RHS as MOVED so the
|
||||
// local-leak analysis treats the assignment as ownership
|
||||
// transfer to the parent, not as a continuing local handle.
|
||||
//
|
||||
// Two real-repo shapes this closes (curl, openssl, postgres):
|
||||
//
|
||||
// (i) Sub-buffer alias inside a returned struct:
|
||||
// e = curlx_calloc(...);
|
||||
// e->name = (char *)e + sizeof(*e); // sub-buffer alias
|
||||
// return e;
|
||||
// Without this gate, e's OPEN transferred to e->name, e went
|
||||
// MOVED, and e->name surfaced as "never closed".
|
||||
//
|
||||
// (ii) Local-into-field ownership transfer:
|
||||
// ptr = malloc(...);
|
||||
// mem->buf = ptr; // ownership now lives in *mem
|
||||
// Without this gate, ptr was MOVED to mem->buf, but mem->buf
|
||||
// then leaked at exit because *mem's lifecycle is owned by
|
||||
// the caller. With this gate, ptr is MOVED (transfer
|
||||
// acknowledged) and mem->buf is not separately tracked.
|
||||
//
|
||||
// Multi-language: applies to all languages. This is distinct
|
||||
// from the `apply_call` field-LHS gate (Go-only because the
|
||||
// documented TS/JS class-field acquire
|
||||
// `this.fd = fs.openSync(...)` IS the expected leak pattern
|
||||
// in tests/fixtures/.../typescript/state/resource_class.ts —
|
||||
// that path remains untouched here because RHS-is-a-call
|
||||
// routes through `apply_call`, not `apply_assignment`).
|
||||
if def.contains('.') || def.contains("->") {
|
||||
for used in &info.taint.uses {
|
||||
if let Some(use_sym) = self.get_sym(info, used) {
|
||||
let lc = state.resource.get(use_sym);
|
||||
if lc.contains(ResourceLifecycle::OPEN) {
|
||||
state.resource.set(use_sym, ResourceLifecycle::MOVED);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
// If the RHS is a tracked resource, transfer its state
|
||||
for used in &info.taint.uses {
|
||||
if let Some(use_sym) = self.get_sym(info, used) {
|
||||
|
|
@ -1063,6 +1109,99 @@ mod tests {
|
|||
assert!(!is_guard_like("open_file"));
|
||||
}
|
||||
|
||||
/// SAFE-FOR-FIELD-LHS gate: when an assignment writes a tracked
|
||||
/// resource into a struct field (`def` contains `.` or `->`), the
|
||||
/// RHS local must be marked MOVED (ownership transferred to the
|
||||
/// parent struct) and the field must NOT be tracked as a separate
|
||||
/// OPEN resource. Pins the curl/dynhds.c::entry_new shape.
|
||||
#[test]
|
||||
fn field_lhs_assignment_moves_rhs_and_does_not_track_field() {
|
||||
let mut interner = SymbolInterner::new();
|
||||
let sym_e = interner.intern("e");
|
||||
let sym_field = interner.intern("e->name");
|
||||
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::C,
|
||||
resource_pairs: rules::resource_pairs(Lang::C),
|
||||
interner: &interner,
|
||||
resource_method_summaries: &[],
|
||||
ptr_proxy_hints: None,
|
||||
};
|
||||
|
||||
let mut state = ProductState::initial();
|
||||
state.resource.set(sym_e, ResourceLifecycle::OPEN);
|
||||
|
||||
// `e->name = e` (sub-buffer alias): defines = "e->name", uses = ["e"].
|
||||
let info = NodeInfo {
|
||||
kind: StmtKind::Seq,
|
||||
ast: AstMeta {
|
||||
span: (0, 10),
|
||||
..Default::default()
|
||||
},
|
||||
taint: TaintMeta {
|
||||
defines: Some("e->name".into()),
|
||||
uses: vec!["e".into()],
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let (state, events) = transfer.apply(NodeIndex::new(0), &info, None, state);
|
||||
assert!(events.is_empty());
|
||||
assert_eq!(
|
||||
state.resource.get(sym_e),
|
||||
ResourceLifecycle::MOVED,
|
||||
"RHS local should transfer to MOVED (ownership handed to parent struct)"
|
||||
);
|
||||
assert_eq!(
|
||||
state.resource.get(sym_field),
|
||||
ResourceLifecycle::empty(),
|
||||
"field-LHS must NOT be seeded as a separately-tracked OPEN resource"
|
||||
);
|
||||
}
|
||||
|
||||
/// Recall guard for the field-LHS gate: a plain local-to-local
|
||||
/// assignment (no field on the LHS) must still transfer the OPEN
|
||||
/// state to the new alias and mark the source MOVED, preserving
|
||||
/// existing local-leak detection.
|
||||
#[test]
|
||||
fn local_to_local_assignment_still_transfers_open() {
|
||||
let mut interner = SymbolInterner::new();
|
||||
let sym_buf = interner.intern("buf");
|
||||
let sym_cursor = interner.intern("cursor");
|
||||
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::C,
|
||||
resource_pairs: rules::resource_pairs(Lang::C),
|
||||
interner: &interner,
|
||||
resource_method_summaries: &[],
|
||||
ptr_proxy_hints: None,
|
||||
};
|
||||
|
||||
let mut state = ProductState::initial();
|
||||
state.resource.set(sym_buf, ResourceLifecycle::OPEN);
|
||||
|
||||
// `cursor = buf`: plain alias, no field.
|
||||
let info = NodeInfo {
|
||||
kind: StmtKind::Seq,
|
||||
ast: AstMeta {
|
||||
span: (0, 10),
|
||||
..Default::default()
|
||||
},
|
||||
taint: TaintMeta {
|
||||
defines: Some("cursor".into()),
|
||||
uses: vec!["buf".into()],
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let (state, events) = transfer.apply(NodeIndex::new(0), &info, None, state);
|
||||
assert!(events.is_empty());
|
||||
assert_eq!(state.resource.get(sym_buf), ResourceLifecycle::MOVED);
|
||||
assert_eq!(state.resource.get(sym_cursor), ResourceLifecycle::OPEN);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_simple_truth_check_recognises_bare_identifier() {
|
||||
let make = |text: &str, vars: Vec<&str>| NodeInfo {
|
||||
|
|
|
|||
|
|
@ -1480,6 +1480,7 @@ pub(crate) fn extract_intra_file_ssa_summaries(
|
|||
None,
|
||||
Some(&formal_params),
|
||||
None,
|
||||
None,
|
||||
);
|
||||
|
||||
// Only store if the summary has observable effects. With
|
||||
|
|
@ -1610,6 +1611,11 @@ pub(crate) fn lower_all_functions_from_bodies(
|
|||
} else {
|
||||
None
|
||||
};
|
||||
let param_types_ref = if !body.meta.param_types.is_empty() {
|
||||
Some(body.meta.param_types.as_slice())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let summary = ssa_transfer::extract_ssa_func_summary(
|
||||
&func_ssa,
|
||||
&body.graph,
|
||||
|
|
@ -1623,6 +1629,7 @@ pub(crate) fn lower_all_functions_from_bodies(
|
|||
locator,
|
||||
Some(formal_params),
|
||||
formal_destructured,
|
||||
param_types_ref,
|
||||
);
|
||||
|
||||
// Always insert the summary, even when all fields are empty/default.
|
||||
|
|
@ -1860,6 +1867,11 @@ fn rerun_extraction_with_augmented_summaries(
|
|||
} else {
|
||||
None
|
||||
};
|
||||
let param_types_ref = if !body.meta.param_types.is_empty() {
|
||||
Some(body.meta.param_types.as_slice())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let new_summary = ssa_transfer::extract_ssa_func_summary_full(
|
||||
&callee.ssa,
|
||||
parent_cfg,
|
||||
|
|
@ -1874,6 +1886,7 @@ fn rerun_extraction_with_augmented_summaries(
|
|||
Some(&body.meta.params),
|
||||
Some(&augmented_snapshot),
|
||||
formal_destructured,
|
||||
param_types_ref,
|
||||
);
|
||||
|
||||
// OR-merge sink-only fields into the existing summary.
|
||||
|
|
|
|||
|
|
@ -308,6 +308,24 @@ pub fn classify_condition(text: &str) -> PredicateKind {
|
|||
return PredicateKind::AllowlistCheck;
|
||||
}
|
||||
|
||||
// ── Java/Kotlin Pattern.matcher().matches() chain (before TypeCheck) ─
|
||||
//
|
||||
// Recognise `<re>.matcher(value).matches()` as a regex allowlist
|
||||
// validator, not a TypeCheck. The receiver of `.matcher(` must
|
||||
// contain `regex` or `pattern` so we don't widen to arbitrary
|
||||
// `obj.matcher(x).matches()` calls. Surfaced by GHSA-h8cj-hpmg-636v
|
||||
// (Appsmith FILTER_TEMP_TABLE_NAME_PATTERN.matcher(tableName).matches()).
|
||||
// Matched here (before the generic `.matches(` TypeCheck branch
|
||||
// below) so the chain doesn't silently fall into TypeCheck.
|
||||
if let Some(matcher_pos) = lower.find(".matcher(")
|
||||
&& lower[matcher_pos..].contains(".matches(")
|
||||
{
|
||||
let receiver = &lower[..matcher_pos];
|
||||
if receiver.contains("regex") || receiver.contains("pattern") {
|
||||
return PredicateKind::ValidationCall;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Type-check guards ──────────────────────────────────────────────
|
||||
if lower.contains("typeof ")
|
||||
|| lower.contains("isinstance(")
|
||||
|
|
@ -395,6 +413,24 @@ pub fn classify_condition(text: &str) -> PredicateKind {
|
|||
}
|
||||
}
|
||||
|
||||
// Java idiom `<PATTERN>.matcher(value).matches()` — the regex
|
||||
// allowlist on Java/Kotlin is a two-step chain (`Pattern.matcher`
|
||||
// returns a `Matcher`, `.matches()` is the boolean predicate).
|
||||
// The bare callee here is `matches` (no args), so the
|
||||
// single-call recogniser above doesn't fire. Lock on the
|
||||
// chain shape and require the receiver of `.matcher(` to carry
|
||||
// a regex / pattern marker so we don't widen to `.matcher(` on
|
||||
// arbitrary types. Surfaced by GHSA-h8cj-hpmg-636v
|
||||
// (Appsmith FILTER_TEMP_TABLE_NAME_PATTERN.matcher(tableName).matches()).
|
||||
if bare == "matches"
|
||||
&& let Some(matcher_pos) = lower.find(".matcher(")
|
||||
{
|
||||
let receiver = &lower[..matcher_pos];
|
||||
if receiver.contains("regex") || receiver.contains("pattern") {
|
||||
return PredicateKind::ValidationCall;
|
||||
}
|
||||
}
|
||||
|
||||
// Sanitizer
|
||||
if bare.contains("sanitiz") || bare.contains("escape") || bare.contains("encode") {
|
||||
return PredicateKind::SanitizerCall;
|
||||
|
|
@ -648,6 +684,25 @@ fn extract_validation_target(text: &str) -> Option<String> {
|
|||
let trimmed = trimmed.trim_start_matches(['(', '!', ' ', '\t']);
|
||||
let trimmed = trimmed.strip_prefix("not ").unwrap_or(trimmed).trim();
|
||||
|
||||
// Java/Kotlin chain `<re>.matcher(value).matches()`: the validated
|
||||
// target is the inner `.matcher()` argument, not the bare `.matches()`
|
||||
// receiver. Locked on the same regex/pattern receiver gate as the
|
||||
// classifier (GHSA-h8cj-hpmg-636v).
|
||||
if trimmed.to_ascii_lowercase().contains(".matches(")
|
||||
&& let Some(matcher_pos) = trimmed.find(".matcher(")
|
||||
{
|
||||
let receiver_lower = trimmed[..matcher_pos].to_ascii_lowercase();
|
||||
if receiver_lower.contains("regex") || receiver_lower.contains("pattern") {
|
||||
let args_start = matcher_pos + ".matcher(".len();
|
||||
if let Some(first_arg) = first_call_arg(&trimmed[args_start..]) {
|
||||
let first_arg = first_arg.strip_prefix('&').unwrap_or(first_arg).trim();
|
||||
if !first_arg.is_empty() && is_identifier(first_arg) {
|
||||
return Some(first_arg.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find the first `(` which separates callee from args
|
||||
let paren_pos = trimmed.find('(')?;
|
||||
let callee_part = &trimmed[..paren_pos];
|
||||
|
|
@ -1559,3 +1614,43 @@ mod tests {
|
|||
assert!(is_bounded_length_check("x.len() <= 256"));
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod ghsa_h8cj_hpmg_636v_tests {
|
||||
use super::*;
|
||||
#[test]
|
||||
fn java_pattern_matcher_chain_classifies_as_validation() {
|
||||
let kind =
|
||||
classify_condition("FILTER_TEMP_TABLE_NAME_PATTERN.matcher(tableName).matches()");
|
||||
assert_eq!(
|
||||
kind,
|
||||
PredicateKind::ValidationCall,
|
||||
"matcher().matches() chain on PATTERN-named receiver should be ValidationCall"
|
||||
);
|
||||
}
|
||||
#[test]
|
||||
fn java_pattern_matcher_chain_target_is_matcher_arg() {
|
||||
let (kind, target) = classify_condition_with_target(
|
||||
"FILTER_TEMP_TABLE_NAME_PATTERN.matcher(tableName).matches()",
|
||||
);
|
||||
assert_eq!(kind, PredicateKind::ValidationCall);
|
||||
assert_eq!(target.as_deref(), Some("tableName"));
|
||||
}
|
||||
#[test]
|
||||
fn java_negated_pattern_matcher_chain_target_is_matcher_arg() {
|
||||
let (kind, target) = classify_condition_with_target(
|
||||
"!FILTER_TEMP_TABLE_NAME_PATTERN.matcher(tableName).matches()",
|
||||
);
|
||||
assert_eq!(kind, PredicateKind::ValidationCall);
|
||||
assert_eq!(target.as_deref(), Some("tableName"));
|
||||
}
|
||||
#[test]
|
||||
fn java_pattern_matcher_chain_non_pattern_receiver_is_not_validation() {
|
||||
// Precision guard: only fires when receiver name has regex/pattern marker.
|
||||
let kind = classify_condition("obj.matcher(x).matches()");
|
||||
assert!(
|
||||
kind != PredicateKind::ValidationCall,
|
||||
"no regex marker should not trigger validation"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8077,13 +8077,17 @@ fn is_abstract_safe_for_sink(
|
|||
return true;
|
||||
}
|
||||
|
||||
// HTML_ESCAPE type-only gate: an integer's decimal representation is
|
||||
// always digits (with optional leading `-`), which never contain HTML
|
||||
// metacharacters (`<`, `>`, `"`, `'`, `&`, `/`, `:`) in either text or
|
||||
// attribute context. The interval bound is irrelevant here, a large
|
||||
// magnitude doesn't introduce metachars, so HTML_ESCAPE uses a
|
||||
// type-only leaf check rather than the SQL/FILE/SHELL dual gate below.
|
||||
if sink_caps.intersects(Cap::HTML_ESCAPE) {
|
||||
// HTML_ESCAPE / FILE_IO type-only gate: an integer's decimal
|
||||
// representation is always digits (with optional leading `-`), which
|
||||
// never contain HTML metacharacters (`<`, `>`, `"`, `'`, `&`, `/`,
|
||||
// `:`) nor path metacharacters (`/`, `\`, `.`). Magnitude is
|
||||
// irrelevant — a large value doesn't introduce metachars, so both
|
||||
// sink classes use a type-only leaf check rather than the SQL/SHELL
|
||||
// dual gate below. Closes the sudo-rs RUSTSEC-2023-0069 patched FP
|
||||
// where `let uid: u32 = user.parse()?; path.push(uid.to_string())`
|
||||
// was flagged as a path-traversal FILE_IO sink despite the SSA
|
||||
// value being unambiguously typed as a numeric uid.
|
||||
if sink_caps.intersects(Cap::HTML_ESCAPE | Cap::FILE_IO) {
|
||||
if let Some(tf) = type_facts {
|
||||
let leaves = trace_tainted_leaf_values(inst, state, ssa, cfg);
|
||||
if !leaves.is_empty() && leaves.iter().all(|v| tf.is_int(*v)) {
|
||||
|
|
@ -8092,14 +8096,15 @@ fn is_abstract_safe_for_sink(
|
|||
}
|
||||
}
|
||||
|
||||
// Dual gate: SQL_QUERY / FILE_IO / SHELL_ESCAPE with proven Int type AND
|
||||
// bounded interval. Both conditions required: type proves the value IS
|
||||
// an integer (not a string that happened to parse), interval proves it's
|
||||
// Dual gate: SQL_QUERY / SHELL_ESCAPE with proven Int type AND bounded
|
||||
// interval. Both conditions required: type proves the value IS an
|
||||
// integer (not a string that happened to parse), interval proves it's
|
||||
// bounded (not arbitrary). Traces through Assign chains so
|
||||
// "const_string + tainted_int" is caught. SHELL_ESCAPE is included
|
||||
// because a bounded integer's decimal representation can't contain shell
|
||||
// metacharacters.
|
||||
if sink_caps.intersects(Cap::SQL_QUERY | Cap::FILE_IO | Cap::SHELL_ESCAPE) {
|
||||
// "const_string + tainted_int" is caught. SQL_QUERY keeps the bound
|
||||
// requirement because RUSTSEC-2024-0363-style binary-protocol overflow
|
||||
// requires a 4 GiB+ payload; SHELL_ESCAPE keeps it because a
|
||||
// multi-line decimal can still trip newline-sensitive shell parsing.
|
||||
if sink_caps.intersects(Cap::SQL_QUERY | Cap::SHELL_ESCAPE) {
|
||||
if let Some(tf) = type_facts {
|
||||
let leaves = trace_tainted_leaf_values(inst, state, ssa, cfg);
|
||||
if !leaves.is_empty()
|
||||
|
|
@ -8212,10 +8217,13 @@ fn is_call_abstract_safe(
|
|||
}
|
||||
}
|
||||
|
||||
// HTML_ESCAPE type-only gate (same as non-Call path): digits never
|
||||
// contain HTML metacharacters regardless of magnitude, so an integer
|
||||
// payload is safe for an HTML sink without requiring a bounded interval.
|
||||
if sink_caps.intersects(Cap::HTML_ESCAPE) {
|
||||
// HTML_ESCAPE / FILE_IO type-only gate (same as non-Call path): digits
|
||||
// never contain HTML metacharacters or path-traversal metacharacters
|
||||
// regardless of magnitude, so an integer payload is safe for these
|
||||
// sink classes without requiring a bounded interval. Closes the
|
||||
// RUSTSEC-2023-0069 patched FP for cross-function summary-resolved
|
||||
// path sinks like `open_for_user(uid)`.
|
||||
if sink_caps.intersects(Cap::HTML_ESCAPE | Cap::FILE_IO) {
|
||||
if let Some(tf) = type_facts {
|
||||
let leaves = trace_tainted_leaf_values(inst, state, ssa, cfg);
|
||||
if !leaves.is_empty() && leaves.iter().all(|v| tf.is_int(*v)) {
|
||||
|
|
@ -8224,8 +8232,10 @@ fn is_call_abstract_safe(
|
|||
}
|
||||
}
|
||||
|
||||
// Dual gate for Call sinks (same as non-Call path)
|
||||
if sink_caps.intersects(Cap::SQL_QUERY | Cap::FILE_IO | Cap::SHELL_ESCAPE) {
|
||||
// Dual gate for Call sinks: SQL_QUERY / SHELL_ESCAPE keep the bounded-
|
||||
// interval requirement (see is_abstract_safe_for_sink for the
|
||||
// rationale).
|
||||
if sink_caps.intersects(Cap::SQL_QUERY | Cap::SHELL_ESCAPE) {
|
||||
if let Some(tf) = type_facts {
|
||||
let leaves = trace_tainted_leaf_values(inst, state, ssa, cfg);
|
||||
if !leaves.is_empty()
|
||||
|
|
@ -8368,6 +8378,15 @@ fn trace_single_leaf(
|
|||
leaves.push(v);
|
||||
}
|
||||
}
|
||||
SsaOp::Call { callee, .. } if crate::ssa::type_facts::is_int_producing_callee(callee) => {
|
||||
// Int-producing conversion (`str.parse::<u32>()`, `Atoi`,
|
||||
// `parseInt`, ...). Tracing past the Call would land on the
|
||||
// String-typed source and defeat the type-only HTML/FILE_IO
|
||||
// suppression below — but the Call's *result* is unambiguously
|
||||
// numeric, so the value itself is the right leaf. Mirrors the
|
||||
// is_numeric_length_access stop-leaf at the top of this fn.
|
||||
leaves.push(v);
|
||||
}
|
||||
SsaOp::Call { args, .. } => {
|
||||
// For a Call whose node is not itself a Source (so the Call
|
||||
// introduces no fresh attacker-controlled taint), trace through
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ use super::{
|
|||
use crate::cfg::{BodyId, Cfg, FuncSummaries};
|
||||
use crate::labels::{Cap, SourceKind};
|
||||
use crate::ssa::ir::{SsaBody, SsaOp, SsaValue, Terminator};
|
||||
use crate::ssa::type_facts::{TypeFactResult, TypeKind, analyze_types_with_param_types};
|
||||
use crate::summary::GlobalSummaries;
|
||||
use crate::symbol::Lang;
|
||||
use crate::taint::domain::{TaintOrigin, VarTaint};
|
||||
|
|
@ -51,6 +52,7 @@ pub fn extract_ssa_func_summary(
|
|||
locator: Option<&crate::summary::SinkSiteLocator<'_>>,
|
||||
formal_param_names: Option<&[String]>,
|
||||
formal_destructured_fields: Option<&[Vec<String>]>,
|
||||
param_types: Option<&[Option<TypeKind>]>,
|
||||
) -> crate::summary::ssa_summary::SsaFuncSummary {
|
||||
extract_ssa_func_summary_full(
|
||||
ssa,
|
||||
|
|
@ -66,6 +68,7 @@ pub fn extract_ssa_func_summary(
|
|||
formal_param_names,
|
||||
None,
|
||||
formal_destructured_fields,
|
||||
param_types,
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -104,7 +107,34 @@ pub fn extract_ssa_func_summary_full(
|
|||
// taint flow through sibling bindings is visible to summary
|
||||
// extraction (CVE-2026-25544 / @payloadcms/drizzle SQLi).
|
||||
formal_destructured_fields: Option<&[Vec<String>]>,
|
||||
// BodyMeta.param_types parallel-vec. When supplied, drives a local
|
||||
// `analyze_types_with_param_types` pass so the per-parameter probe's
|
||||
// `SsaTaintTransfer.type_facts` is populated. Without this, helper
|
||||
// bodies whose sinks are recognised only via type-qualified callee
|
||||
// resolution (`receiver_type.label_prefix() + "." + method`, e.g.
|
||||
// `DatabaseConnection.execute` for JDBC `Statement.execute`) silently
|
||||
// drop the sink during summary extraction even though the same
|
||||
// callee is correctly classified by the post-optimise transfer in
|
||||
// `transfer_inst`. Surfaced by GHSA-h8cj-hpmg-636v (Appsmith
|
||||
// FilterDataServiceCE.dropTable: helper `executeDbQuery(query)`
|
||||
// routes the SQL string through `statement.execute(query)` whose
|
||||
// SQL_QUERY caps were invisible to the param-1 probe). `None` for
|
||||
// legacy / test paths preserves prior behaviour.
|
||||
param_types: Option<&[Option<TypeKind>]>,
|
||||
) -> crate::summary::ssa_summary::SsaFuncSummary {
|
||||
// Pre-compute type facts on the un-optimised SSA body so the per-param
|
||||
// probe can resolve sinks that depend on receiver-type inference.
|
||||
// Empty const_values: this runs *before* the optimiser, so const-prop
|
||||
// refinements aren't available yet, but the pass-1 instruction-shape
|
||||
// typing (Source/Param/Call→constructor_type) and the second-pass
|
||||
// Assign/Phi propagation are sufficient for the JDBC chain
|
||||
// `Statement s = conn.createStatement(); s.execute(q);` to type `s`
|
||||
// as `DatabaseConnection`.
|
||||
let local_type_facts: Option<TypeFactResult> = param_types.map(|pt| {
|
||||
let empty_consts: HashMap<SsaValue, crate::ssa::const_prop::ConstLattice> = HashMap::new();
|
||||
analyze_types_with_param_types(ssa, cfg, &empty_consts, Some(lang), pt)
|
||||
});
|
||||
let local_type_facts_ref: Option<&TypeFactResult> = local_type_facts.as_ref();
|
||||
use crate::summary::SinkSite;
|
||||
use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform};
|
||||
|
||||
|
|
@ -215,7 +245,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
param_seed: None,
|
||||
receiver_seed: None,
|
||||
const_values: None,
|
||||
type_facts: None,
|
||||
type_facts: local_type_facts_ref,
|
||||
ssa_summaries,
|
||||
extra_labels: None,
|
||||
base_aliases: None,
|
||||
|
|
@ -761,7 +791,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
param_seed: None,
|
||||
receiver_seed: None,
|
||||
const_values: None,
|
||||
type_facts: None,
|
||||
type_facts: local_type_facts_ref,
|
||||
ssa_summaries,
|
||||
extra_labels: None,
|
||||
base_aliases: None,
|
||||
|
|
|
|||
|
|
@ -4332,6 +4332,7 @@ fn ssa_summary_identity_propagation() {
|
|||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
!summary.param_to_return.is_empty(),
|
||||
|
|
@ -4396,6 +4397,7 @@ fn ssa_summary_sanitizer_strips_bits() {
|
|||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
// Sanitizer should strip some bits
|
||||
for (_, transform) in &summary.param_to_return {
|
||||
|
|
@ -4453,6 +4455,7 @@ fn ssa_summary_source_adds_bits() {
|
|||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
!summary.source_caps.is_empty(),
|
||||
|
|
@ -4510,6 +4513,7 @@ fn ssa_summary_param_to_sink() {
|
|||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
!summary.param_to_sink.is_empty(),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue