mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-15 20:05:13 +02:00
Performance and precision pass (#64)
This commit is contained in:
parent
c7c5e0f3a1
commit
fb698d2c27
97 changed files with 9932 additions and 517 deletions
437
src/ast.rs
437
src/ast.rs
|
|
@ -40,7 +40,7 @@ use crate::utils::ext::lowercase_ext;
|
|||
use crate::utils::{Config, query_cache};
|
||||
use petgraph::graph::NodeIndex;
|
||||
use std::borrow::Cow;
|
||||
use std::cell::RefCell;
|
||||
use std::cell::{OnceCell, RefCell};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::ops::ControlFlow;
|
||||
use std::path::Path;
|
||||
|
|
@ -972,6 +972,27 @@ impl<'a> ParsedSource<'a> {
|
|||
{
|
||||
continue;
|
||||
}
|
||||
// Layer C2: PHP `Serializable::unserialize($input)` magic
|
||||
// method body — `public function unserialize($x) { ...
|
||||
// unserialize($x) ... }`. This is the legacy
|
||||
// `Serializable` interface contract (deprecated since PHP
|
||||
// 8.1). PHP itself invokes the method when restoring an
|
||||
// instance, so the body's `\unserialize($x)` call cannot
|
||||
// be removed without breaking the interface. The
|
||||
// actionable signal is at the class level (the class
|
||||
// implements Serializable — fix is to migrate to
|
||||
// `__serialize` / `__unserialize`), not at this call
|
||||
// site. Genuine deserialization sinks (free-function
|
||||
// `unserialize($_GET[..])`, helpers reading from session
|
||||
// / cache, etc.) keep firing because they are not inside
|
||||
// a method declaration named `unserialize` with a single
|
||||
// formal parameter passed straight to the call.
|
||||
if cq.meta.id == "php.deser.unserialize"
|
||||
&& self.lang_slug == "php"
|
||||
&& is_php_unserialize_magic_method_passthrough(cap.node, self.bytes)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
// Layer D: C/C++ buffer-overflow pattern rules
|
||||
// (`{c,cpp}.memory.strcpy`, `strcat`, `sprintf`) fire
|
||||
// syntactically on every call regardless of argument
|
||||
|
|
@ -1102,6 +1123,13 @@ struct ParsedFile<'a> {
|
|||
file_cfg: FileCfg,
|
||||
lang_rules: LangAnalysisRules,
|
||||
has_lang_rules: bool,
|
||||
/// Per-body SSA + const-prop + type-fact cache, lazily populated on first
|
||||
/// request and indexed by `BodyId.0`. Was being recomputed 2-3× per body
|
||||
/// across `run_cfg_analyses_with_lowered` (cfg analyses + state analyses)
|
||||
/// and `run_auth_analyses` (`collect_file_var_types`); on the gin profile
|
||||
/// `build_body_const_facts` accounted for 13.6% of wall-clock and a
|
||||
/// single-pass cache collapses that to ~4.5%.
|
||||
body_const_facts_cache: OnceCell<Vec<Option<cfg_analysis::BodyConstFacts>>>,
|
||||
}
|
||||
|
||||
impl<'a> ParsedFile<'a> {
|
||||
|
|
@ -1153,9 +1181,33 @@ impl<'a> ParsedFile<'a> {
|
|||
file_cfg,
|
||||
lang_rules,
|
||||
has_lang_rules,
|
||||
body_const_facts_cache: OnceCell::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Per-body const-fact cache, computed once on first request and shared
|
||||
/// across every per-body iteration in this file's analysis. Indexed by
|
||||
/// `BodyId.0` so callers can look up by body identity.
|
||||
fn body_const_facts_all(&self) -> &[Option<cfg_analysis::BodyConstFacts>] {
|
||||
self.body_const_facts_cache.get_or_init(|| {
|
||||
let lang = Lang::from_slug(self.source.lang_slug).unwrap_or(Lang::Rust);
|
||||
self.file_cfg
|
||||
.bodies
|
||||
.iter()
|
||||
.map(|b| cfg_analysis::build_body_const_facts(b, lang))
|
||||
.collect()
|
||||
})
|
||||
}
|
||||
|
||||
/// Look up the cached const facts for a specific body.
|
||||
fn body_const_facts(
|
||||
&self,
|
||||
body: &crate::cfg::BodyCfg,
|
||||
) -> Option<&cfg_analysis::BodyConstFacts> {
|
||||
let all = self.body_const_facts_all();
|
||||
all.get(body.meta.id.0 as usize).and_then(|f| f.as_ref())
|
||||
}
|
||||
|
||||
/// The top-level body's CFG graph (for backward-compatible access).
|
||||
fn cfg_graph(&self) -> &Cfg {
|
||||
&self.file_cfg.toplevel().graph
|
||||
|
|
@ -1468,7 +1520,7 @@ impl<'a> ParsedFile<'a> {
|
|||
.filter(|f| f.body_id == body.meta.id)
|
||||
.cloned()
|
||||
.collect();
|
||||
let body_const_facts = cfg_analysis::build_body_const_facts(body, caller_lang);
|
||||
let body_const_facts = self.body_const_facts(body);
|
||||
let cfg_ctx = cfg_analysis::AnalysisContext {
|
||||
cfg: &body.graph,
|
||||
entry: body.entry,
|
||||
|
|
@ -1481,8 +1533,8 @@ impl<'a> ParsedFile<'a> {
|
|||
taint_findings: &body_taint,
|
||||
analysis_rules: self.rules_ref(),
|
||||
taint_active,
|
||||
body_const_facts: body_const_facts.as_ref(),
|
||||
type_facts: body_const_facts.as_ref().map(|f| &f.type_facts),
|
||||
body_const_facts,
|
||||
type_facts: body_const_facts.map(|f| &f.type_facts),
|
||||
auth_decorators: &body.meta.auth_decorators,
|
||||
closure_released_var_names: Some(
|
||||
closure_released_per_body
|
||||
|
|
@ -1546,13 +1598,11 @@ impl<'a> ParsedFile<'a> {
|
|||
// points-to facts so the proxy-acquire transfer can
|
||||
// suppress SymbolId attribution on field-aliased
|
||||
// receivers (e.g. `m := c.mu; m.Lock()`).
|
||||
let body_pointer_hints = cfg_analysis::build_body_const_facts(body, caller_lang)
|
||||
.as_ref()
|
||||
.and_then(|f| {
|
||||
f.pointer_facts
|
||||
.as_ref()
|
||||
.map(|pf| pf.name_proxy_hints(&f.ssa))
|
||||
});
|
||||
let body_pointer_hints = self.body_const_facts(body).and_then(|f| {
|
||||
f.pointer_facts
|
||||
.as_ref()
|
||||
.map(|pf| pf.name_proxy_hints(&f.ssa))
|
||||
});
|
||||
let state_findings = state::run_state_analysis(
|
||||
&body.graph,
|
||||
body.entry,
|
||||
|
|
@ -1666,12 +1716,11 @@ impl<'a> ParsedFile<'a> {
|
|||
/// syntactic heuristics. Returns `None` when no body produces a
|
||||
/// typed variable.
|
||||
fn collect_file_var_types(&self) -> Option<auth_analysis::VarTypes> {
|
||||
let caller_lang = Lang::from_slug(self.source.lang_slug).unwrap_or(Lang::Rust);
|
||||
let mut merged: std::collections::HashMap<String, crate::ssa::type_facts::TypeKind> =
|
||||
std::collections::HashMap::new();
|
||||
let mut dropped: std::collections::HashSet<String> = std::collections::HashSet::new();
|
||||
for body in &self.file_cfg.bodies {
|
||||
let Some(facts) = cfg_analysis::build_body_const_facts(body, caller_lang) else {
|
||||
let Some(facts) = self.body_const_facts(body) else {
|
||||
continue;
|
||||
};
|
||||
for (idx, def) in facts.ssa.value_defs.iter().enumerate() {
|
||||
|
|
@ -1792,6 +1841,7 @@ pub fn extract_auth_model_for_debug(
|
|||
source.bytes,
|
||||
source.path,
|
||||
&rules,
|
||||
None,
|
||||
);
|
||||
Ok(Some(model))
|
||||
}
|
||||
|
|
@ -2401,6 +2451,165 @@ fn is_php_unserialize_allowed_classes_restricted(
|
|||
false
|
||||
}
|
||||
|
||||
/// PHP-only: returns `true` when the captured `function_call_expression`
|
||||
/// is the canonical `Serializable::unserialize($input)` magic-method
|
||||
/// pass-through — i.e. the call is inside a `method_declaration` named
|
||||
/// exactly `unserialize` (PHP method names are case-insensitive) with
|
||||
/// one formal parameter, and the call's single argument is the bare
|
||||
/// parameter variable.
|
||||
///
|
||||
/// **Why this is a non-actionable site for `php.deser.unserialize`:**
|
||||
/// `Serializable::unserialize($input)` is an interface contract method
|
||||
/// that PHP itself invokes when restoring an instance via the runtime
|
||||
/// `\unserialize($bytes)` machinery. The implementation MUST decode
|
||||
/// `$input` (the body's `\unserialize(...)` call) — there is no
|
||||
/// "safer" rewrite that preserves the contract. The actionable signal
|
||||
/// is at the class level (the class implements the deprecated
|
||||
/// `Serializable` interface — fix is to migrate to `__serialize` /
|
||||
/// `__unserialize`), not at this call site.
|
||||
///
|
||||
/// Conservative recognition:
|
||||
/// - method must be a `method_declaration` (NOT a free `function_definition` —
|
||||
/// the magic semantics only apply to instance methods)
|
||||
/// - method name == `unserialize` (case-insensitive)
|
||||
/// - exactly 1 formal parameter
|
||||
/// - call has exactly 1 argument
|
||||
/// - argument's inner expression is a `variable_name` whose name equals the
|
||||
/// formal parameter's name
|
||||
///
|
||||
/// Genuine deserialization sinks (free `unserialize($_GET[...])`, helpers
|
||||
/// reading from session/cache and passing through, etc.) keep firing
|
||||
/// because they are not inside a method declaration named `unserialize`.
|
||||
fn is_php_unserialize_magic_method_passthrough(cap_node: tree_sitter::Node, bytes: &[u8]) -> bool {
|
||||
// The pattern captures `@n` (the function name); locate the enclosing
|
||||
// function_call_expression.
|
||||
let call_node = if cap_node.kind() == "function_call_expression" {
|
||||
cap_node
|
||||
} else {
|
||||
let mut cur = cap_node;
|
||||
let mut found = None;
|
||||
for _ in 0..4 {
|
||||
if cur.kind() == "function_call_expression" {
|
||||
found = Some(cur);
|
||||
break;
|
||||
}
|
||||
match cur.parent() {
|
||||
Some(p) => cur = p,
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
match found {
|
||||
Some(c) => c,
|
||||
None => return false,
|
||||
}
|
||||
};
|
||||
|
||||
// Walk up to the nearest method_declaration. Stop at any other
|
||||
// function-introducing scope (free function, closure, arrow) — those
|
||||
// are not the Serializable contract.
|
||||
let mut cur = call_node;
|
||||
let method = loop {
|
||||
let Some(parent) = cur.parent() else {
|
||||
return false;
|
||||
};
|
||||
match parent.kind() {
|
||||
"method_declaration" => break parent,
|
||||
"function_definition"
|
||||
| "anonymous_function"
|
||||
| "anonymous_function_creation_expression"
|
||||
| "arrow_function"
|
||||
| "program" => return false,
|
||||
_ => {}
|
||||
}
|
||||
cur = parent;
|
||||
};
|
||||
|
||||
// Method name must be exactly `unserialize` (case-insensitive).
|
||||
let Some(name_node) = method
|
||||
.child_by_field_name("name")
|
||||
.or_else(|| find_named_child_of_kind(method, "name"))
|
||||
else {
|
||||
return false;
|
||||
};
|
||||
let Ok(method_name) = std::str::from_utf8(&bytes[name_node.byte_range()]) else {
|
||||
return false;
|
||||
};
|
||||
if !method_name.eq_ignore_ascii_case("unserialize") {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Method must have exactly 1 formal parameter; capture its bare name.
|
||||
let Some(params) = method
|
||||
.child_by_field_name("parameters")
|
||||
.or_else(|| find_named_child_of_kind(method, "formal_parameters"))
|
||||
else {
|
||||
return false;
|
||||
};
|
||||
let mut formal_params: Vec<tree_sitter::Node> = Vec::new();
|
||||
for i in 0..params.named_child_count() as u32 {
|
||||
if let Some(p) = params.named_child(i)
|
||||
&& matches!(
|
||||
p.kind(),
|
||||
"simple_parameter"
|
||||
| "variadic_parameter"
|
||||
| "property_promotion_parameter"
|
||||
| "promoted_constructor_parameter"
|
||||
)
|
||||
{
|
||||
formal_params.push(p);
|
||||
}
|
||||
}
|
||||
if formal_params.len() != 1 {
|
||||
return false;
|
||||
}
|
||||
let param = formal_params[0];
|
||||
let var_node = param
|
||||
.child_by_field_name("name")
|
||||
.or_else(|| find_named_child_of_kind(param, "variable_name"));
|
||||
let Some(var_node) = var_node else {
|
||||
return false;
|
||||
};
|
||||
let inner_name_node = if var_node.kind() == "variable_name" {
|
||||
var_node.named_child(0)
|
||||
} else {
|
||||
Some(var_node)
|
||||
};
|
||||
let Some(inner_name_node) = inner_name_node else {
|
||||
return false;
|
||||
};
|
||||
let Ok(param_name) = std::str::from_utf8(&bytes[inner_name_node.byte_range()]) else {
|
||||
return false;
|
||||
};
|
||||
|
||||
// Call must have exactly 1 argument that is the bare parameter variable.
|
||||
let Some(arg_list) = find_named_child_of_kind(call_node, "arguments") else {
|
||||
return false;
|
||||
};
|
||||
let mut args: Vec<tree_sitter::Node> = Vec::new();
|
||||
for i in 0..arg_list.named_child_count() as u32 {
|
||||
if let Some(c) = arg_list.named_child(i)
|
||||
&& c.kind() == "argument"
|
||||
{
|
||||
args.push(c);
|
||||
}
|
||||
}
|
||||
if args.len() != 1 {
|
||||
return false;
|
||||
}
|
||||
let inner = args[0].named_child(0);
|
||||
let Some(inner) = inner else { return false };
|
||||
if inner.kind() != "variable_name" {
|
||||
return false;
|
||||
}
|
||||
let Some(arg_name_node) = inner.named_child(0) else {
|
||||
return false;
|
||||
};
|
||||
let Ok(arg_name) = std::str::from_utf8(&bytes[arg_name_node.byte_range()]) else {
|
||||
return false;
|
||||
};
|
||||
arg_name == param_name
|
||||
}
|
||||
|
||||
/// C/C++-only Layer D: structural suppression of buffer-overflow pattern
|
||||
/// rules when the source / format-string argument is a literal whose
|
||||
/// contributed length is statically bounded.
|
||||
|
|
@ -3999,6 +4208,15 @@ pub struct FusedResult {
|
|||
crate::symbol::FuncKey,
|
||||
auth_analysis::model::AuthCheckSummary,
|
||||
)>,
|
||||
/// Per-Python-file router-level dep declarations + `include_router`
|
||||
/// edges for cross-file FastAPI router-dep propagation. `None` for
|
||||
/// non-Python files; `Some((module_id, facts))` for Python files
|
||||
/// where `module_id` is the file's
|
||||
/// [`auth_analysis::router_facts::module_id_for_storage`] key.
|
||||
/// Pass 1 collects these into
|
||||
/// `GlobalSummaries.router_facts_by_module`; pass 2 resolves them
|
||||
/// per-file via `GlobalSummaries::resolve_cross_file_router_deps`.
|
||||
pub router_facts: Option<(String, auth_analysis::router_facts::PerFileRouterFacts)>,
|
||||
}
|
||||
|
||||
/// Parse the file once, build the CFG once, and produce both function
|
||||
|
|
@ -4034,6 +4252,7 @@ pub fn analyse_file_fused(
|
|||
cfg_nodes: 0,
|
||||
ssa_bodies: vec![],
|
||||
auth_summaries: vec![],
|
||||
router_facts: None,
|
||||
});
|
||||
};
|
||||
|
||||
|
|
@ -4081,6 +4300,28 @@ pub fn analyse_file_fused(
|
|||
(vec![], vec![])
|
||||
};
|
||||
|
||||
let mut auth_summaries: Vec<(
|
||||
crate::symbol::FuncKey,
|
||||
auth_analysis::model::AuthCheckSummary,
|
||||
)> = Vec::new();
|
||||
|
||||
// Per-file router-dep facts for cross-file FastAPI propagation.
|
||||
// Extracted unconditionally for Python files so pass 1 can persist
|
||||
// them into `GlobalSummaries.router_facts_by_module` even on Cfg /
|
||||
// Taint modes (the auth analysis itself runs only under Full, but
|
||||
// the index has to be populated by the time pass 2 launches).
|
||||
let router_facts_for_this_file = if parsed.source.lang_slug == "python" {
|
||||
auth_analysis::router_facts::module_id_for_storage(parsed.source.path).map(|module_id| {
|
||||
let facts = auth_analysis::router_facts::extract_router_facts_for_python(
|
||||
&parsed.source.tree,
|
||||
parsed.source.bytes,
|
||||
);
|
||||
(module_id, facts)
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
if cfg.scanner.mode == AnalysisMode::Full || cfg.scanner.mode == AnalysisMode::Ast {
|
||||
let ast_findings = parsed.source.run_ast_queries(cfg);
|
||||
// Layer B only applies when taint had the opportunity to evaluate
|
||||
|
|
@ -4095,23 +4336,70 @@ pub fn analyse_file_fused(
|
|||
} else {
|
||||
out.extend(ast_findings);
|
||||
}
|
||||
out.extend(parsed.run_auth_analyses(cfg, global_summaries, scan_root));
|
||||
// Build the AuthorizationModel exactly once per file when Full
|
||||
// mode needs both diagnostics AND per-file summaries; pre-fix
|
||||
// the diag path and the summary path each ran their own
|
||||
// `extract::extract_authorization_model`, duplicating
|
||||
// `collect_top_level_units` + every framework extractor's AST
|
||||
// walk. See `auth_analysis::run_auth_analysis_with_model` for
|
||||
// measured savings.
|
||||
let auth_rules = auth_analysis::config::build_auth_rules(cfg, parsed.source.lang_slug);
|
||||
if auth_rules.enabled {
|
||||
// Resolve cross-file router-deps for the current file (Python only).
|
||||
// The resolved map lives on `AuthorizationModel.cross_file_router_deps`
|
||||
// BEFORE `FlaskExtractor::extract` runs, so the in-extractor merge
|
||||
// sees both inline router-deps and the cross-file lift in one pass.
|
||||
let cross_file_router_deps = if parsed.source.lang_slug == "python"
|
||||
&& let Some(gs) = global_summaries
|
||||
&& let Some(child_module_id) =
|
||||
auth_analysis::router_facts::module_id_for_path(parsed.source.path)
|
||||
{
|
||||
let resolved = gs.resolve_cross_file_router_deps(&child_module_id);
|
||||
if resolved.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(resolved)
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let auth_model = auth_analysis::extract::extract_authorization_model(
|
||||
parsed.source.lang_slug,
|
||||
cfg.framework_ctx.as_ref(),
|
||||
&parsed.source.tree,
|
||||
parsed.source.bytes,
|
||||
parsed.source.path,
|
||||
&auth_rules,
|
||||
cross_file_router_deps.as_ref(),
|
||||
);
|
||||
// Extract summaries from the **base** model (pre var-types,
|
||||
// pre-helper-lifting) so the persisted per-file summary
|
||||
// carries only the helper's own intrinsic auth checks,
|
||||
// matching the legacy `extract_auth_summaries_by_key` path
|
||||
// bit-for-bit.
|
||||
if cfg.scanner.mode == AnalysisMode::Full {
|
||||
auth_summaries = auth_analysis::extract_auth_summaries_from_model(
|
||||
&auth_model,
|
||||
parsed.source.lang_slug,
|
||||
parsed.source.path,
|
||||
scan_root,
|
||||
);
|
||||
}
|
||||
let var_types = parsed.collect_file_var_types();
|
||||
out.extend(auth_analysis::run_auth_analysis_with_model(
|
||||
auth_model,
|
||||
&parsed.source.tree,
|
||||
parsed.source.lang_slug,
|
||||
parsed.source.path,
|
||||
&auth_rules,
|
||||
var_types.as_ref(),
|
||||
global_summaries,
|
||||
scan_root,
|
||||
));
|
||||
}
|
||||
}
|
||||
parsed.source.finalize_diags(&mut out, cfg);
|
||||
|
||||
let auth_summaries = if cfg.scanner.mode == AnalysisMode::Full {
|
||||
auth_analysis::extract_auth_summaries_by_key(
|
||||
&parsed.source.tree,
|
||||
parsed.source.bytes,
|
||||
parsed.source.lang_slug,
|
||||
parsed.source.path,
|
||||
cfg,
|
||||
scan_root,
|
||||
)
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
Ok(FusedResult {
|
||||
summaries,
|
||||
diags: out,
|
||||
|
|
@ -4119,6 +4407,7 @@ pub fn analyse_file_fused(
|
|||
cfg_nodes,
|
||||
ssa_bodies,
|
||||
auth_summaries,
|
||||
router_facts: router_facts_for_this_file,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -4441,6 +4730,100 @@ fn php_unserialize_allowed_classes_recognises_safe_forms() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn php_unserialize_magic_method_passthrough_recognises_serializable_contract() {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
|
||||
parser.set_language(&lang).unwrap();
|
||||
let q = r#"(function_call_expression function: (name) @n (#eq? @n "unserialize")) @vuln"#;
|
||||
|
||||
// Canonical Serializable::unserialize delegating to __unserialize.
|
||||
let code = b"<?php\nclass R {\n public function unserialize($serialized): void {\n $this->__unserialize(unserialize($serialized));\n }\n}\n";
|
||||
let tree = parser.parse(code, None).unwrap();
|
||||
let cap = first_php_capture(&tree, code, q);
|
||||
assert!(
|
||||
is_php_unserialize_magic_method_passthrough(cap, code),
|
||||
"Serializable::unserialize($x) → unserialize($x) should be suppressed"
|
||||
);
|
||||
|
||||
// Multi-target list-destructuring assignment shape (Joomla Cli/Input).
|
||||
let code = b"<?php\nclass C {\n public function unserialize($input) {\n [$this->a, $this->b] = unserialize($input);\n }\n}\n";
|
||||
let tree = parser.parse(code, None).unwrap();
|
||||
let cap = first_php_capture(&tree, code, q);
|
||||
assert!(
|
||||
is_php_unserialize_magic_method_passthrough(cap, code),
|
||||
"list-destructuring inside Serializable::unserialize should be suppressed"
|
||||
);
|
||||
|
||||
// Case-insensitive method name (PHP semantics).
|
||||
let code = b"<?php\nclass C { public function UnSerialize($d) { return unserialize($d); } }\n";
|
||||
let tree = parser.parse(code, None).unwrap();
|
||||
let cap = first_php_capture(&tree, code, q);
|
||||
assert!(
|
||||
is_php_unserialize_magic_method_passthrough(cap, code),
|
||||
"method name should match case-insensitively (PHP)"
|
||||
);
|
||||
|
||||
// Free function `unserialize` is NOT a magic method, must NOT be suppressed.
|
||||
let code = b"<?php\nfunction load($d) { return unserialize($d); }\n";
|
||||
let tree = parser.parse(code, None).unwrap();
|
||||
let cap = first_php_capture(&tree, code, q);
|
||||
assert!(
|
||||
!is_php_unserialize_magic_method_passthrough(cap, code),
|
||||
"free function should NOT be suppressed"
|
||||
);
|
||||
|
||||
// Different method name, NOT a Serializable contract, must NOT be suppressed.
|
||||
let code = b"<?php\nclass C { public function decode($d) { return unserialize($d); } }\n";
|
||||
let tree = parser.parse(code, None).unwrap();
|
||||
let cap = first_php_capture(&tree, code, q);
|
||||
assert!(
|
||||
!is_php_unserialize_magic_method_passthrough(cap, code),
|
||||
"method named `decode` should NOT be suppressed"
|
||||
);
|
||||
|
||||
// Method named `unserialize` but with TWO params, NOT the magic signature,
|
||||
// must NOT be suppressed.
|
||||
let code = b"<?php\nclass C { public function unserialize($d, $opts) { return unserialize($d, $opts); } }\n";
|
||||
let tree = parser.parse(code, None).unwrap();
|
||||
let cap = first_php_capture(&tree, code, q);
|
||||
assert!(
|
||||
!is_php_unserialize_magic_method_passthrough(cap, code),
|
||||
"two-param method named unserialize should NOT be suppressed"
|
||||
);
|
||||
|
||||
// Magic-method signature but the call argument is NOT the formal param —
|
||||
// user is unserializing some other source. Must NOT be suppressed.
|
||||
let code = b"<?php\nclass C { public function unserialize($input) { return unserialize($_GET['x']); } }\n";
|
||||
let tree = parser.parse(code, None).unwrap();
|
||||
let cap = first_php_capture(&tree, code, q);
|
||||
assert!(
|
||||
!is_php_unserialize_magic_method_passthrough(cap, code),
|
||||
"non-pass-through arg inside magic method should NOT be suppressed"
|
||||
);
|
||||
|
||||
// Wrapped argument (`unserialize(trim($input))`) is NOT a bare-param
|
||||
// pass-through — keep firing. This shape covers cache/session
|
||||
// pass-throughs that the rule should still surface.
|
||||
let code = b"<?php\nclass C { public function unserialize($input) { return unserialize(trim($input)); } }\n";
|
||||
let tree = parser.parse(code, None).unwrap();
|
||||
let cap = first_php_capture(&tree, code, q);
|
||||
assert!(
|
||||
!is_php_unserialize_magic_method_passthrough(cap, code),
|
||||
"wrapped argument inside magic method should NOT be suppressed (conservative)"
|
||||
);
|
||||
|
||||
// Anonymous function named-like context (defensive — anonymous_function
|
||||
// is not a method_declaration).
|
||||
let code = b"<?php\n$f = function($input) { return unserialize($input); };\n";
|
||||
let tree = parser.parse(code, None).unwrap();
|
||||
let cap = first_php_capture(&tree, code, q);
|
||||
assert!(
|
||||
!is_php_unserialize_magic_method_passthrough(cap, code),
|
||||
"closure should NOT be suppressed"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn php_weak_hash_non_crypto_use_recognises_canonical_shapes() {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
|
|
|
|||
|
|
@ -257,7 +257,18 @@ fn check_token_override_without_validation(
|
|||
continue;
|
||||
};
|
||||
let Some(final_write) = unit.operations.iter().rev().find(|operation| {
|
||||
operation.kind == OperationKind::Mutation && operation.line >= token_lookup.line
|
||||
operation.kind == OperationKind::Mutation
|
||||
&& operation.line >= token_lookup.line
|
||||
// Ignore `InMemoryLocal` mutations (HashSet/HashMap/Vec
|
||||
// local bookkeeping like `verified_ids.update(myteams)`,
|
||||
// `requested_teams.update(verified_ids)`). The verb is
|
||||
// `update` so `OperationKind::Mutation` is set, but the
|
||||
// sink_class encodes that the receiver is a non-sink
|
||||
// local container — never a token-bound write. Mirrors
|
||||
// the gate in `check_ownership_gaps`.
|
||||
&& operation
|
||||
.sink_class
|
||||
.is_none_or(|class| class.is_auth_relevant())
|
||||
}) else {
|
||||
continue;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -55,6 +55,13 @@ pub struct AuthAnalysisRules {
|
|||
/// `WHERE <ACL>.user_id = ?N`, make every returned row
|
||||
/// membership-gated. See `sql_semantics::classify_sql_query`.
|
||||
pub acl_tables: Vec<String>,
|
||||
/// Callee names that, when they appear as the chain root of a
|
||||
/// chained-call shape (`select(X).filter_by(...)`,
|
||||
/// `query(X).filter(...)`), anchor the trailing method as a DB
|
||||
/// query-builder operation. Overrides the chained-call suppression
|
||||
/// in `classify_sink_class` for SQLAlchemy / similar query-builder
|
||||
/// idioms whose first call returns an opaque builder object.
|
||||
pub db_query_builder_roots: Vec<String>,
|
||||
}
|
||||
|
||||
impl AuthAnalysisRules {
|
||||
|
|
@ -80,6 +87,7 @@ impl AuthAnalysisRules {
|
|||
outbound_network_receiver_prefixes: Vec::new(),
|
||||
cache_receiver_prefixes: Vec::new(),
|
||||
acl_tables: Vec::new(),
|
||||
db_query_builder_roots: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -96,11 +104,13 @@ impl AuthAnalysisRules {
|
|||
}
|
||||
|
||||
/// Does `ty` (last path segment, case-sensitive) match a
|
||||
/// non-sink receiver type? The angle-bracket generic suffix is
|
||||
/// stripped first: `HashMap<i64, String>` → `HashMap`.
|
||||
/// non-sink receiver type? Generic suffixes are stripped first:
|
||||
/// `HashMap<i64, String>` → `HashMap` (Rust/Java/TS angle brackets),
|
||||
/// `set[int]` / `dict[str, int]` → `set` / `dict` (Python PEP 585
|
||||
/// builtin generics + `typing` aliases).
|
||||
pub fn is_non_sink_receiver_type(&self, ty: &str) -> bool {
|
||||
let base = Self::type_last_segment(ty);
|
||||
let base = base.split('<').next().unwrap_or(base).trim();
|
||||
let base = base.split(['<', '[']).next().unwrap_or(base).trim();
|
||||
self.non_sink_receiver_types
|
||||
.iter()
|
||||
.any(|allowed| allowed == base)
|
||||
|
|
@ -115,25 +125,35 @@ impl AuthAnalysisRules {
|
|||
/// The callee string may use either `::` or `.` as the path
|
||||
/// separator (nyx's `callee_name` normalizes both via
|
||||
/// `member_chain`).
|
||||
///
|
||||
/// Bare-callee form: Python uses `set()` / `dict()` / `list()` /
|
||||
/// `defaultdict()` / etc. as direct constructors with no method
|
||||
/// segment. When `callee` has no `.` / `::` separator and matches
|
||||
/// a registered non-sink receiver type, treat the call as a
|
||||
/// non-sink constructor. Closes the
|
||||
/// `verified_ids = set(); verified_ids.update(myteams)` shape in
|
||||
/// sentry where the bare-call form was unrecognised so the bound
|
||||
/// var was missing from `non_sink_vars` and the later
|
||||
/// `.update(..)` classified as DbMutation.
|
||||
pub fn is_non_sink_constructor_callee(&self, callee: &str) -> bool {
|
||||
let normalized = callee.replace("::", ".");
|
||||
let Some((ty, method)) = normalized.rsplit_once('.') else {
|
||||
return false;
|
||||
};
|
||||
if !self.is_non_sink_receiver_type(ty) {
|
||||
return false;
|
||||
if let Some((ty, method)) = normalized.rsplit_once('.') {
|
||||
if !self.is_non_sink_receiver_type(ty) {
|
||||
return false;
|
||||
}
|
||||
return matches!(
|
||||
method,
|
||||
"new"
|
||||
| "with_capacity"
|
||||
| "with_capacity_and_hasher"
|
||||
| "with_hasher"
|
||||
| "from"
|
||||
| "from_iter"
|
||||
| "new_in"
|
||||
| "default"
|
||||
);
|
||||
}
|
||||
matches!(
|
||||
method,
|
||||
"new"
|
||||
| "with_capacity"
|
||||
| "with_capacity_and_hasher"
|
||||
| "with_hasher"
|
||||
| "from"
|
||||
| "from_iter"
|
||||
| "new_in"
|
||||
| "default"
|
||||
)
|
||||
self.is_non_sink_receiver_type(&normalized)
|
||||
}
|
||||
|
||||
/// Does the first segment of a callee receiver chain look like a
|
||||
|
|
@ -260,20 +280,45 @@ impl AuthAnalysisRules {
|
|||
// Verb-name fallback (`is_mutation` / `is_read`) is the loosest
|
||||
// dispatch: it prefix-matches the bare method name against
|
||||
// generic verbs (`Get`, `Save`, `Find`, …) regardless of the
|
||||
// receiver. When the receiver chain itself contains a call
|
||||
// expression (`w.Header().Get(..)`, `r.URL.Query().Get(..)`,
|
||||
// `db.Tx(..).Query(..)`), the receiver is the *return value of
|
||||
// another call*, its type is opaque to the auth analyser and
|
||||
// the bare verb match is too speculative to assume a data-layer
|
||||
// sink. The realtime/outbound/cache prefix dispatches above
|
||||
// already match by the chain root; if none of them claimed the
|
||||
// receiver, dropping the verb-name fallback for chained-call
|
||||
// shapes prevents the entire `w.Header().Get` /
|
||||
// `r.URL.Query().Get` cluster from masquerading as a
|
||||
// `DbCrossTenantRead`. A canonical data-layer call still has a
|
||||
// bare-identifier receiver (`repo.Find(id)`, `db.Query(..)`)
|
||||
// and is unaffected.
|
||||
if !receiver_is_chained_call(callee) {
|
||||
// receiver. Two structural shapes lack the receiver evidence
|
||||
// needed to anchor a DB-sink classification and are excluded:
|
||||
//
|
||||
// 1. Chained-call receiver (`w.Header().Get(..)`,
|
||||
// `r.URL.Query().Get(..)`, `db.Tx(..).Query(..)`) — the
|
||||
// receiver is the *return value of another call*, its type
|
||||
// is opaque to the auth analyser.
|
||||
// 2. Bare-identifier callee with no receiver dot at all
|
||||
// (`list(..)`, `filter(..)`, `create_audit_entry(..)`,
|
||||
// `update_coding_agent_state(..)`) — Python / JS / Ruby
|
||||
// builtins and locally-defined helpers routinely collide
|
||||
// with the verb vocabulary. Real ORM / DB calls always
|
||||
// carry a receiver (`User.find(id)`, `Model.objects.filter`,
|
||||
// `repo.save(x)`); a bare `list(events)` is the Python
|
||||
// builtin and `filter(fn, xs)` is `Iterable.filter`.
|
||||
//
|
||||
// The realtime / outbound / cache prefix dispatches above
|
||||
// already match by the chain root; gating the verb fallback on
|
||||
// a simple non-chained receiver dot prevents both shapes from
|
||||
// masquerading as data-layer sinks while leaving canonical
|
||||
// `repo.Find(id)` / `db.Query(..)` calls unaffected.
|
||||
if receiver_is_simple_chain(callee) {
|
||||
if self.is_mutation(callee) {
|
||||
return Some(SinkClass::DbMutation);
|
||||
}
|
||||
if self.is_read(callee) {
|
||||
return Some(SinkClass::DbCrossTenantRead);
|
||||
}
|
||||
}
|
||||
// SQLAlchemy / query-builder chained shapes:
|
||||
// `select(X).filter_by(...)`, `query(X).filter(...)`,
|
||||
// `select().join().where()`. The chain receiver is the return
|
||||
// value of an opaque builder primitive that the type tracker
|
||||
// cannot follow, but the chain *root* segment is itself a known
|
||||
// DB query-builder verb — strong enough evidence to anchor a
|
||||
// DB-sink classification when paired with a mutation/read verb
|
||||
// on the trailing method. Closes airflow-style
|
||||
// `session.scalar(select(C).filter_by(conn_id=user_input))`.
|
||||
if receiver_is_chained_call(callee) && self.chain_root_is_db_query_builder(callee) {
|
||||
if self.is_mutation(callee) {
|
||||
return Some(SinkClass::DbMutation);
|
||||
}
|
||||
|
|
@ -284,6 +329,42 @@ impl AuthAnalysisRules {
|
|||
None
|
||||
}
|
||||
|
||||
/// True when any non-final segment of the chain is an
|
||||
/// intermediate-call (ends with `()`) whose verb matches a
|
||||
/// configured `db_query_builder_roots` entry. Used to anchor
|
||||
/// chained-call shapes like `select(X).filter_by(id=...)` (Python)
|
||||
/// or `query(X).filter(...)` to a DB-sink classification despite
|
||||
/// the opaque builder return value.
|
||||
pub fn chain_root_is_db_query_builder(&self, callee: &str) -> bool {
|
||||
if self.db_query_builder_roots.is_empty() {
|
||||
return false;
|
||||
}
|
||||
let segments: Vec<&str> = callee.split('.').collect();
|
||||
if segments.len() < 2 {
|
||||
return false;
|
||||
}
|
||||
for seg in &segments[..segments.len() - 1] {
|
||||
if !seg.ends_with(')') {
|
||||
continue;
|
||||
}
|
||||
let stripped = seg
|
||||
.trim_end_matches(')')
|
||||
.trim_end_matches('(')
|
||||
.trim_end_matches(')');
|
||||
if stripped.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if self
|
||||
.db_query_builder_roots
|
||||
.iter()
|
||||
.any(|root| matches_name(stripped, root))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
pub fn requires_admin_path(&self, path: &str) -> bool {
|
||||
let lower = path.to_ascii_lowercase();
|
||||
let normalized = if lower.starts_with('/') {
|
||||
|
|
@ -583,7 +664,29 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
|
|||
"invitedemail".into(),
|
||||
"recipient".into(),
|
||||
],
|
||||
non_sink_receiver_types: Vec::new(),
|
||||
// Python builtin / `collections` non-sink container types.
|
||||
// Recognised both as type-annotation hints (`x: set[int]`)
|
||||
// and as bare-callee constructor forms (`x = set()`,
|
||||
// `cache = collections.defaultdict(list)`, …). Method
|
||||
// calls on bound vars (`x.update`, `x.add`, `cache.pop`)
|
||||
// are then classified as `InMemoryLocal`, suppressing the
|
||||
// false `DbMutation` / `DbCrossTenantRead` sink shape.
|
||||
// Closes sentry `api/helpers/teams.py:46` shape where
|
||||
// `verified_ids = set(); verified_ids.update(myteams)` was
|
||||
// flagged as cross-tenant mutation.
|
||||
non_sink_receiver_types: vec![
|
||||
"set".into(),
|
||||
"dict".into(),
|
||||
"list".into(),
|
||||
"tuple".into(),
|
||||
"frozenset".into(),
|
||||
"defaultdict".into(),
|
||||
"OrderedDict".into(),
|
||||
"Counter".into(),
|
||||
"deque".into(),
|
||||
"ChainMap".into(),
|
||||
"namedtuple".into(),
|
||||
],
|
||||
non_sink_receiver_name_prefixes: Vec::new(),
|
||||
non_sink_global_receivers: Vec::new(),
|
||||
non_sink_method_names: Vec::new(),
|
||||
|
|
@ -591,6 +694,12 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
|
|||
outbound_network_receiver_prefixes: Vec::new(),
|
||||
cache_receiver_prefixes: Vec::new(),
|
||||
acl_tables: Vec::new(),
|
||||
// SQLAlchemy queryset builders. `select(X).filter_by(id=...)`
|
||||
// / `query(X).filter(id=...)` chains return opaque builder
|
||||
// objects whose type the auth analyser cannot follow; the
|
||||
// chain *root* primitive itself is the DB-anchor evidence.
|
||||
// Closes airflow-style `session.scalar(select(C).filter_by(...))`.
|
||||
db_query_builder_roots: vec!["select".into(), "query".into()],
|
||||
}
|
||||
} else if matches!(lang_slug, "ruby") {
|
||||
AuthAnalysisRules {
|
||||
|
|
@ -766,6 +875,7 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
|
|||
outbound_network_receiver_prefixes: Vec::new(),
|
||||
cache_receiver_prefixes: Vec::new(),
|
||||
acl_tables: Vec::new(),
|
||||
db_query_builder_roots: Vec::new(),
|
||||
}
|
||||
} else if matches!(lang_slug, "go") {
|
||||
AuthAnalysisRules {
|
||||
|
|
@ -862,6 +972,7 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
|
|||
outbound_network_receiver_prefixes: Vec::new(),
|
||||
cache_receiver_prefixes: Vec::new(),
|
||||
acl_tables: Vec::new(),
|
||||
db_query_builder_roots: Vec::new(),
|
||||
}
|
||||
} else if matches!(lang_slug, "java") {
|
||||
AuthAnalysisRules {
|
||||
|
|
@ -954,6 +1065,7 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
|
|||
outbound_network_receiver_prefixes: Vec::new(),
|
||||
cache_receiver_prefixes: Vec::new(),
|
||||
acl_tables: Vec::new(),
|
||||
db_query_builder_roots: Vec::new(),
|
||||
}
|
||||
} else if matches!(lang_slug, "rust") {
|
||||
AuthAnalysisRules {
|
||||
|
|
@ -1137,6 +1249,7 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
|
|||
"members".into(),
|
||||
"share_grants".into(),
|
||||
],
|
||||
db_query_builder_roots: Vec::new(),
|
||||
}
|
||||
} else {
|
||||
AuthAnalysisRules {
|
||||
|
|
@ -1290,6 +1403,7 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
|
|||
outbound_network_receiver_prefixes: Vec::new(),
|
||||
cache_receiver_prefixes: Vec::new(),
|
||||
acl_tables: Vec::new(),
|
||||
db_query_builder_roots: Vec::new(),
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -1367,6 +1481,10 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
|
|||
&lang_cfg.auth.cache_receiver_prefixes,
|
||||
);
|
||||
extend_unique(&mut rules.acl_tables, &lang_cfg.auth.acl_tables);
|
||||
extend_unique(
|
||||
&mut rules.db_query_builder_roots,
|
||||
&lang_cfg.auth.db_query_builder_roots,
|
||||
);
|
||||
}
|
||||
|
||||
rules
|
||||
|
|
@ -1410,6 +1528,17 @@ pub fn receiver_is_chained_call(callee: &str) -> bool {
|
|||
receiver.contains('(')
|
||||
}
|
||||
|
||||
/// True when the callee has a non-chained receiver dot, i.e. an actual
|
||||
/// receiver identifier or path (`User.find`, `repo.save`,
|
||||
/// `Model.objects.filter`). Returns false for bare-identifier callees
|
||||
/// (`list(..)`, `filter(..)`, `create_audit_entry(..)`) and for
|
||||
/// chained-call receivers (`db.Tx(..).Query(..)`) — both lack the
|
||||
/// receiver evidence needed to anchor a DB-sink classification, see
|
||||
/// the comment in `classify_sink_class`.
|
||||
pub fn receiver_is_simple_chain(callee: &str) -> bool {
|
||||
callee.contains('.') && !receiver_is_chained_call(callee)
|
||||
}
|
||||
|
||||
/// Recognise `require_<resource>_<role>` / `ensure_<resource>_<role>`
|
||||
/// shapes where `<role>` is a closed-vocabulary authorization noun
|
||||
/// (`member`, `owner`, `admin`, `access`, `permission`, `manager`,
|
||||
|
|
@ -1768,6 +1897,161 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// Pin the bare-identifier verb-fallback gate. Bare callees with
|
||||
/// no receiver dot lack the receiver evidence needed to anchor a
|
||||
/// DB-sink classification: `list(...)`, `filter(...)`, `update(...)`,
|
||||
/// `create_audit_entry(...)`, `update_coding_agent_state(...)` are
|
||||
/// Python builtins / JS Array methods / locally-defined helpers,
|
||||
/// not ORM operations. Closes the sentry / saleor / netbox cluster
|
||||
/// where bare-name callees inside route helpers (with `request:
|
||||
/// Request` triggering the user-input precondition) fired
|
||||
/// `py.auth.missing_ownership_check`.
|
||||
#[test]
|
||||
fn classify_sink_class_suppresses_bare_callee_verb_fallback() {
|
||||
use crate::auth_analysis::model::SinkClass;
|
||||
use std::collections::HashSet;
|
||||
let empty: HashSet<String> = HashSet::new();
|
||||
|
||||
for lang in [
|
||||
"python",
|
||||
"javascript",
|
||||
"typescript",
|
||||
"go",
|
||||
"java",
|
||||
"ruby",
|
||||
"rust",
|
||||
] {
|
||||
let cfg = Config::default();
|
||||
let rules = build_auth_rules(&cfg, lang);
|
||||
// Bare callees that prefix-match a read / mutation indicator
|
||||
// must NOT classify as DbCrossTenantRead / DbMutation.
|
||||
assert_eq!(
|
||||
rules.classify_sink_class("list", &empty),
|
||||
None,
|
||||
"lang={lang} bare list",
|
||||
);
|
||||
assert_eq!(
|
||||
rules.classify_sink_class("filter", &empty),
|
||||
None,
|
||||
"lang={lang} bare filter",
|
||||
);
|
||||
assert_eq!(
|
||||
rules.classify_sink_class("update", &empty),
|
||||
None,
|
||||
"lang={lang} bare update",
|
||||
);
|
||||
assert_eq!(
|
||||
rules.classify_sink_class("create_audit_entry", &empty),
|
||||
None,
|
||||
"lang={lang} bare create_audit_entry",
|
||||
);
|
||||
assert_eq!(
|
||||
rules.classify_sink_class("update_coding_agent_state", &empty),
|
||||
None,
|
||||
"lang={lang} bare update_coding_agent_state",
|
||||
);
|
||||
}
|
||||
|
||||
// Recall guard: qualified ORM / DB calls keep firing on every
|
||||
// language that has the verb in its indicator vocabulary.
|
||||
let py_rules = build_auth_rules(&Config::default(), "python");
|
||||
assert_eq!(
|
||||
py_rules.classify_sink_class("Project.objects.filter", &empty),
|
||||
Some(SinkClass::DbCrossTenantRead)
|
||||
);
|
||||
assert_eq!(
|
||||
py_rules.classify_sink_class("Project.objects.update", &empty),
|
||||
Some(SinkClass::DbMutation)
|
||||
);
|
||||
let go_rules = build_auth_rules(&Config::default(), "go");
|
||||
assert_eq!(
|
||||
go_rules.classify_sink_class("repo.Find", &empty),
|
||||
Some(SinkClass::DbCrossTenantRead)
|
||||
);
|
||||
}
|
||||
|
||||
/// Pin the SQLAlchemy queryset-builder chained-call recogniser.
|
||||
/// `select(X).filter_by(id=user_input)` reduces (post `member_chain`
|
||||
/// fix) to the chain-string `"select().filter_by"`. The chained-call
|
||||
/// shape would otherwise be suppressed by `receiver_is_chained_call`,
|
||||
/// blocking recall on the airflow `session.scalar(select(C).filter_by(...))`
|
||||
/// shape. `chain_root_is_db_query_builder` overrides the suppression
|
||||
/// when the chain root is a configured DB-builder verb.
|
||||
#[test]
|
||||
fn chain_root_is_db_query_builder_recognises_sqlalchemy_chains() {
|
||||
use crate::auth_analysis::model::SinkClass;
|
||||
use std::collections::HashSet;
|
||||
let cfg = Config::default();
|
||||
let py_rules = build_auth_rules(&cfg, "python");
|
||||
let empty: HashSet<String> = HashSet::new();
|
||||
|
||||
// Detection: chain root `select()` / `query()` matches the
|
||||
// configured Python `db_query_builder_roots`.
|
||||
assert!(py_rules.chain_root_is_db_query_builder("select().filter_by"));
|
||||
assert!(py_rules.chain_root_is_db_query_builder("query().filter"));
|
||||
assert!(py_rules.chain_root_is_db_query_builder("Session.query().filter"));
|
||||
assert!(py_rules.chain_root_is_db_query_builder("select().join().where"));
|
||||
// Non-builder chain roots: must not match.
|
||||
assert!(!py_rules.chain_root_is_db_query_builder("w.Header().Get"));
|
||||
assert!(!py_rules.chain_root_is_db_query_builder("obj.foo().bar"));
|
||||
// Plain receiver chains (no intermediate call): not handled
|
||||
// here — the simple-chain branch covers them.
|
||||
assert!(!py_rules.chain_root_is_db_query_builder("repo.Find"));
|
||||
assert!(!py_rules.chain_root_is_db_query_builder("Project.objects.filter"));
|
||||
// Classification: chained-call DB-builder shapes anchor to
|
||||
// DbCrossTenantRead / DbMutation when the trailing verb matches.
|
||||
assert_eq!(
|
||||
py_rules.classify_sink_class("select().filter_by", &empty),
|
||||
Some(SinkClass::DbCrossTenantRead)
|
||||
);
|
||||
assert_eq!(
|
||||
py_rules.classify_sink_class("query().delete", &empty),
|
||||
Some(SinkClass::DbMutation)
|
||||
);
|
||||
assert_eq!(
|
||||
py_rules.classify_sink_class("select().update", &empty),
|
||||
Some(SinkClass::DbMutation)
|
||||
);
|
||||
// Regression guard: chained-call shapes that are NOT DB
|
||||
// builders (Go HTTP `w.Header().get`, generic `obj.foo().bar`)
|
||||
// remain suppressed even when the trailing verb prefix-matches.
|
||||
// Run on a Python-rules instance with the verb in its read
|
||||
// indicator vocabulary to exercise the guard.
|
||||
assert_eq!(py_rules.classify_sink_class("w.Header().get", &empty), None);
|
||||
assert_eq!(py_rules.classify_sink_class("obj.foo().get", &empty), None);
|
||||
|
||||
// Languages without `db_query_builder_roots` defaults must not
|
||||
// false-positive on chained-call shapes.
|
||||
for lang in ["javascript", "typescript", "go", "java", "ruby", "rust"] {
|
||||
let rules = build_auth_rules(&Config::default(), lang);
|
||||
assert!(
|
||||
!rules.chain_root_is_db_query_builder("select().filter_by"),
|
||||
"lang={lang} unexpectedly classified select().filter_by as DB-builder chain",
|
||||
);
|
||||
assert_eq!(
|
||||
rules.classify_sink_class("select().filter_by", &empty),
|
||||
None,
|
||||
"lang={lang} unexpectedly classified select().filter_by as DB sink",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn receiver_is_simple_chain_classifies_correctly() {
|
||||
use super::receiver_is_simple_chain;
|
||||
// Simple receiver chain (allowed for verb fallback).
|
||||
assert!(receiver_is_simple_chain("repo.Find"));
|
||||
assert!(receiver_is_simple_chain("Project.objects.filter"));
|
||||
assert!(receiver_is_simple_chain("self.cache.insert"));
|
||||
// Bare-identifier callee (rejected — no receiver evidence).
|
||||
assert!(!receiver_is_simple_chain("list"));
|
||||
assert!(!receiver_is_simple_chain("filter"));
|
||||
assert!(!receiver_is_simple_chain("create_audit_entry"));
|
||||
// Chained-call receiver (rejected — receiver type opaque).
|
||||
assert!(!receiver_is_simple_chain("w.Header().Get"));
|
||||
assert!(!receiver_is_simple_chain("db.Tx(opts).Query"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sink_class_is_auth_relevant_only_for_non_local_classes() {
|
||||
use crate::auth_analysis::model::SinkClass;
|
||||
|
|
@ -1836,6 +2120,97 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// Pin the Python non-sink container recogniser. Both type
|
||||
/// annotations (`x: set[int]`, `m: dict[str, int]`) and
|
||||
/// bare-callee constructor calls (`set()`, `dict()`,
|
||||
/// `defaultdict()`) must register the bound variable as a
|
||||
/// non-sink receiver, suppressing later `.update(..)` /
|
||||
/// `.add(..)` calls from classifying as `DbMutation` /
|
||||
/// `DbCrossTenantRead`.
|
||||
#[test]
|
||||
fn python_non_sink_container_recognition() {
|
||||
use crate::auth_analysis::model::SinkClass;
|
||||
use std::collections::HashSet;
|
||||
let cfg = Config::default();
|
||||
let rules = build_auth_rules(&cfg, "python");
|
||||
|
||||
// Type annotations: PEP 585 builtin generics + typing aliases.
|
||||
assert!(rules.is_non_sink_receiver_type("set"));
|
||||
assert!(rules.is_non_sink_receiver_type("set[int]"));
|
||||
assert!(rules.is_non_sink_receiver_type("dict[str, int]"));
|
||||
assert!(rules.is_non_sink_receiver_type("list[str]"));
|
||||
assert!(rules.is_non_sink_receiver_type("defaultdict"));
|
||||
assert!(rules.is_non_sink_receiver_type("Counter"));
|
||||
assert!(rules.is_non_sink_receiver_type("OrderedDict"));
|
||||
// Negative: arbitrary type names must not match.
|
||||
assert!(!rules.is_non_sink_receiver_type("Project"));
|
||||
assert!(!rules.is_non_sink_receiver_type("QuerySet"));
|
||||
|
||||
// Bare-callee constructor form: `set()`, `dict()`,
|
||||
// `defaultdict()`, `Counter()`.
|
||||
assert!(rules.is_non_sink_constructor_callee("set"));
|
||||
assert!(rules.is_non_sink_constructor_callee("dict"));
|
||||
assert!(rules.is_non_sink_constructor_callee("list"));
|
||||
assert!(rules.is_non_sink_constructor_callee("frozenset"));
|
||||
assert!(rules.is_non_sink_constructor_callee("defaultdict"));
|
||||
assert!(rules.is_non_sink_constructor_callee("Counter"));
|
||||
// Negative: bare callees that are NOT non-sink types must not
|
||||
// be treated as constructors. `update`, `filter`, `find` are
|
||||
// verb names, not container types.
|
||||
assert!(!rules.is_non_sink_constructor_callee("update"));
|
||||
assert!(!rules.is_non_sink_constructor_callee("filter"));
|
||||
assert!(!rules.is_non_sink_constructor_callee("find"));
|
||||
assert!(!rules.is_non_sink_constructor_callee("Project"));
|
||||
|
||||
// End-to-end classification: `verified_ids.update(..)` with
|
||||
// `verified_ids` registered as a non-sink var classifies as
|
||||
// `InMemoryLocal`, the precondition for suppressing the
|
||||
// false `DbMutation` finding.
|
||||
let mut non_sink_vars: HashSet<String> = HashSet::new();
|
||||
non_sink_vars.insert("verified_ids".to_string());
|
||||
non_sink_vars.insert("requested_teams".to_string());
|
||||
assert_eq!(
|
||||
rules.classify_sink_class("verified_ids.update", &non_sink_vars),
|
||||
Some(SinkClass::InMemoryLocal)
|
||||
);
|
||||
assert_eq!(
|
||||
rules.classify_sink_class("requested_teams.add", &non_sink_vars),
|
||||
Some(SinkClass::InMemoryLocal)
|
||||
);
|
||||
// Recall guard: a real ORM mutation on the same verb still
|
||||
// classifies as `DbMutation` when the receiver is qualified.
|
||||
let empty: HashSet<String> = HashSet::new();
|
||||
assert_eq!(
|
||||
rules.classify_sink_class("Project.objects.update", &empty),
|
||||
Some(SinkClass::DbMutation)
|
||||
);
|
||||
}
|
||||
|
||||
/// Cross-language recall guard: only Python populates the new
|
||||
/// container types by default. Other-language defaults must
|
||||
/// not inadvertently inherit `set` / `dict` / `list` as non-sink
|
||||
/// types via the merge path (those names overlap with verb
|
||||
/// indicators in those languages).
|
||||
#[test]
|
||||
fn python_container_types_do_not_leak_to_other_languages() {
|
||||
let cfg = Config::default();
|
||||
for lang in ["javascript", "typescript", "go", "java", "ruby", "rust"] {
|
||||
let rules = build_auth_rules(&cfg, lang);
|
||||
assert!(
|
||||
!rules.is_non_sink_receiver_type("set"),
|
||||
"lang={lang} unexpectedly recognises bare `set` as non-sink type",
|
||||
);
|
||||
assert!(
|
||||
!rules.is_non_sink_receiver_type("dict"),
|
||||
"lang={lang} unexpectedly recognises bare `dict` as non-sink type",
|
||||
);
|
||||
assert!(
|
||||
!rules.is_non_sink_receiver_type("list"),
|
||||
"lang={lang} unexpectedly recognises bare `list` as non-sink type",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// `require_<resource>_<role>` structural recogniser for project
|
||||
/// helpers like `require_trip_member`, `require_doc_owner`.
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -4,8 +4,7 @@ use super::axum::{
|
|||
expanded_guard_call_sites, guard_calls_for_handler, inject_guard_checks, rust_param_aliases,
|
||||
};
|
||||
use super::common::{
|
||||
attach_route_handler, call_name, collect_top_level_units, named_children, resolve_handler_node,
|
||||
string_literal_value,
|
||||
attach_route_handler, call_name, named_children, resolve_handler_node, string_literal_value,
|
||||
};
|
||||
use crate::auth_analysis::config::AuthAnalysisRules;
|
||||
use crate::auth_analysis::model::{
|
||||
|
|
@ -30,21 +29,11 @@ impl AuthExtractor for ActixWebExtractor {
|
|||
bytes: &[u8],
|
||||
path: &Path,
|
||||
rules: &AuthAnalysisRules,
|
||||
) -> AuthorizationModel {
|
||||
model: &mut AuthorizationModel,
|
||||
) {
|
||||
let root = tree.root_node();
|
||||
let mut model = AuthorizationModel::default();
|
||||
|
||||
collect_top_level_units(root, bytes, rules, &mut model);
|
||||
collect_routes(root, root, bytes, path, rules, &mut model);
|
||||
apply_typed_extractor_guards_to_units(
|
||||
root,
|
||||
bytes,
|
||||
rules,
|
||||
&mut model,
|
||||
GuardFramework::ActixWeb,
|
||||
);
|
||||
|
||||
model
|
||||
collect_routes(root, root, bytes, path, rules, model);
|
||||
apply_typed_extractor_guards_to_units(root, bytes, rules, model, GuardFramework::ActixWeb);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,7 @@
|
|||
use super::AuthExtractor;
|
||||
use super::common::{
|
||||
attach_route_handler, call_name, call_site_from_node, call_sites_from_value,
|
||||
collect_top_level_units, function_definition_node, named_children, resolve_handler_node,
|
||||
string_literal_value, text,
|
||||
function_definition_node, named_children, resolve_handler_node, string_literal_value, text,
|
||||
};
|
||||
use crate::auth_analysis::config::AuthAnalysisRules;
|
||||
use crate::auth_analysis::model::{
|
||||
|
|
@ -29,15 +28,11 @@ impl AuthExtractor for AxumExtractor {
|
|||
bytes: &[u8],
|
||||
path: &Path,
|
||||
rules: &AuthAnalysisRules,
|
||||
) -> AuthorizationModel {
|
||||
model: &mut AuthorizationModel,
|
||||
) {
|
||||
let root = tree.root_node();
|
||||
let mut model = AuthorizationModel::default();
|
||||
|
||||
collect_top_level_units(root, bytes, rules, &mut model);
|
||||
collect_routes(root, root, bytes, path, rules, &mut model);
|
||||
apply_typed_extractor_guards_to_units(root, bytes, rules, &mut model, GuardFramework::Axum);
|
||||
|
||||
model
|
||||
collect_routes(root, root, bytes, path, rules, model);
|
||||
apply_typed_extractor_guards_to_units(root, bytes, rules, model, GuardFramework::Axum);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -896,6 +896,13 @@ fn collect_unit_state(
|
|||
// `instance_variable`.
|
||||
if matches!(node.kind(), "assignment" | "assignment_expression") {
|
||||
collect_row_population(node, bytes, state);
|
||||
// Python `verified_ids = set()` /
|
||||
// `cache: dict[str,int] = {}` and JS analogues bind a
|
||||
// local non-sink container. `collect_non_sink_binding`
|
||||
// accepts both `pattern`/`value` and `left`/`right`
|
||||
// field names so the same recognition path covers
|
||||
// these assignment-node shapes.
|
||||
collect_non_sink_binding(node, bytes, rules, state);
|
||||
}
|
||||
}
|
||||
"for_expression" => {
|
||||
|
|
@ -915,9 +922,27 @@ fn collect_unit_state(
|
|||
_ => {}
|
||||
}
|
||||
|
||||
for value in extract_value_refs(node, bytes) {
|
||||
state.value_refs.push(value);
|
||||
}
|
||||
// O(1) per-node shallow value-ref emission, then descend.
|
||||
//
|
||||
// Pre-fix this site called `extract_value_refs(node, bytes)` which walks
|
||||
// node's entire subtree. Combined with the recursion below — which
|
||||
// visits every descendant and re-runs the same call at each level — the
|
||||
// total work was O(N * subtree_size) ≈ O(N²) per function body. On
|
||||
// mm/channels/app the inner-walk dominated `build_function_unit_with_meta`
|
||||
// and its descendants (~17%+15%+11% of total wall-clock split across
|
||||
// `build_function_unit_with_meta`, `collect_unit_state`, and
|
||||
// `extract_value_refs` in the post-shared-model profile, 2026-05-04).
|
||||
//
|
||||
// The recursion below already visits every descendant once. Emitting a
|
||||
// shallow value-ref per node — only the ref the node itself represents —
|
||||
// produces the same SET of value-refs after `dedup_value_refs` runs in
|
||||
// `build_function_unit_with_meta`, because every ref-emitting kind
|
||||
// (member chain, subscript, accessor call, identifier) is reachable as a
|
||||
// single node visit. Public callers of `extract_value_refs` (e.g.
|
||||
// `collect_call`, `collect_condition`, assignment-side extraction) keep
|
||||
// the deep walk: they intentionally want refs from the full subtree
|
||||
// rooted at the argument they pass.
|
||||
append_shallow_value_ref(node, bytes, &mut state.value_refs);
|
||||
|
||||
for idx in 0..node.named_child_count() {
|
||||
let Some(child) = node.named_child(idx as u32) else {
|
||||
|
|
@ -927,6 +952,57 @@ fn collect_unit_state(
|
|||
}
|
||||
}
|
||||
|
||||
/// Per-node value-ref emission used inside `collect_unit_state`'s tree walk.
|
||||
///
|
||||
/// Returns the value-ref the node itself represents (a member chain, a
|
||||
/// subscript, an accessor call's chain, or an identifier-like leaf), without
|
||||
/// descending into descendants. The caller's existing AST recursion handles
|
||||
/// children; relying on that recursion turns the previously O(N²) per-body
|
||||
/// walk into O(N).
|
||||
fn append_shallow_value_ref(node: Node<'_>, bytes: &[u8], refs: &mut Vec<ValueRef>) {
|
||||
match node.kind() {
|
||||
"member_expression"
|
||||
| "attribute"
|
||||
| "selector_expression"
|
||||
| "field_expression"
|
||||
| "field_access" => {
|
||||
if let Some(value) = member_value_ref(node, bytes) {
|
||||
refs.push(value);
|
||||
}
|
||||
}
|
||||
"subscript_expression" | "subscript" | "element_reference" | "index_expression" => {
|
||||
if let Some(value) = subscript_value_ref(node, bytes) {
|
||||
refs.push(value);
|
||||
}
|
||||
}
|
||||
"call_expression" | "call" | "method_invocation" | "method_call_expression" => {
|
||||
// Accessor-call chains (`cache.get(key)`, `req.params.id`) absorb
|
||||
// into a single chain ValueRef; non-accessor calls return None
|
||||
// here and rely on recursion to visit `function` + arg children
|
||||
// so each leaf identifier emits its own ref.
|
||||
if let Some(value) = call_value_ref(node, bytes) {
|
||||
refs.push(value);
|
||||
}
|
||||
}
|
||||
// Bare identifier and Ruby `@foo` / `@@foo` / `$foo` leaves: emit a
|
||||
// single Identifier-kind ValueRef. Mirrors `extract_value_refs`'s
|
||||
// identifier arm so `dedup_value_refs` collapses any cross-path
|
||||
// duplicates against existing emissions from sibling deep walks
|
||||
// (e.g. `collect_condition`'s `extract_value_refs(condition)`).
|
||||
"identifier" | "instance_variable" | "class_variable" | "global_variable" => {
|
||||
refs.push(ValueRef {
|
||||
source_kind: ValueSourceKind::Identifier,
|
||||
name: text(node, bytes),
|
||||
base: None,
|
||||
field: None,
|
||||
index: None,
|
||||
span: span(node),
|
||||
});
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_call(node: Node<'_>, bytes: &[u8], rules: &AuthAnalysisRules, state: &mut UnitState) {
|
||||
let callee = call_name(node, bytes);
|
||||
if callee.is_empty() {
|
||||
|
|
@ -1059,22 +1135,28 @@ fn collect_condition(
|
|||
}
|
||||
}
|
||||
|
||||
/// Detect `let` bindings that produce a known non-sink collection
|
||||
/// (e.g. `HashMap::new()`, `Vec::with_capacity(_)`, `vec![]`, or an
|
||||
/// explicit type annotation like `: HashMap<_, _>`). Registered
|
||||
/// variable names are consulted by `collect_call` so later method
|
||||
/// calls on those bindings (`map.insert(..)`, `set.remove(..)`)
|
||||
/// aren't treated as auth-relevant Read/Mutation operations.
|
||||
/// Detect bindings that produce a known non-sink collection
|
||||
/// (e.g. `HashMap::new()`, `Vec::with_capacity(_)`, `vec![]`, an
|
||||
/// explicit type annotation like `: HashMap<_, _>`, or Python's
|
||||
/// bare `set()` / `dict()` / `collections.defaultdict(list)`).
|
||||
/// Registered variable names are consulted by `collect_call` so
|
||||
/// later method calls on those bindings (`map.insert(..)`,
|
||||
/// `set.remove(..)`, `verified_ids.update(..)`) aren't treated as
|
||||
/// auth-relevant Read/Mutation operations.
|
||||
///
|
||||
/// Rust-oriented in practice; JS/TS/Python/etc. use different
|
||||
/// declaration node kinds and are unaffected.
|
||||
/// Field names accepted: Rust `let_declaration` uses `pattern` /
|
||||
/// `value`; Python `assignment` and JS `assignment_expression` use
|
||||
/// `left` / `right`. Both shapes share the same recognition path.
|
||||
fn collect_non_sink_binding(
|
||||
node: Node<'_>,
|
||||
bytes: &[u8],
|
||||
rules: &AuthAnalysisRules,
|
||||
state: &mut UnitState,
|
||||
) {
|
||||
let Some(pattern) = node.child_by_field_name("pattern") else {
|
||||
let Some(pattern) = node
|
||||
.child_by_field_name("pattern")
|
||||
.or_else(|| node.child_by_field_name("left"))
|
||||
else {
|
||||
return;
|
||||
};
|
||||
let Some(var_name) = first_identifier_name(pattern, bytes) else {
|
||||
|
|
@ -1092,7 +1174,9 @@ fn collect_non_sink_binding(
|
|||
}
|
||||
}
|
||||
|
||||
if let Some(value) = node.child_by_field_name("value")
|
||||
if let Some(value) = node
|
||||
.child_by_field_name("value")
|
||||
.or_else(|| node.child_by_field_name("right"))
|
||||
&& value_is_non_sink_constructor(value, bytes, rules)
|
||||
{
|
||||
state.non_sink_vars.insert(var_name);
|
||||
|
|
@ -3457,18 +3541,53 @@ fn collect_param_names(
|
|||
"parameter_declaration" | "variadic_parameter_declaration"
|
||||
if node.child_by_field_name("name").is_some() =>
|
||||
{
|
||||
if let Some(type_node) = node.child_by_field_name("type")
|
||||
&& is_go_non_user_input_type(type_node, bytes)
|
||||
let type_node = node.child_by_field_name("type");
|
||||
if let Some(t) = type_node
|
||||
&& is_go_non_user_input_type(t, bytes)
|
||||
{
|
||||
return;
|
||||
}
|
||||
// Mirror of the Python `typed_parameter` filter (see
|
||||
// `is_python_id_like_typed_param` arm above): for non-route
|
||||
// units, an id-like Go param whose declared type is a
|
||||
// bounded primitive scalar (`int64`, `uint32`, `string`,
|
||||
// `bool`, `byte`, `rune`, `float64`, …) is a caller-passed
|
||||
// scope identifier, not user-controlled HTTP input. Real
|
||||
// Go HTTP handlers always carry a framework-request-typed
|
||||
// param (`*http.Request`, `*gin.Context`, `echo.Context`,
|
||||
// `*fiber.Ctx`, `*context.APIContext`, …) and are
|
||||
// recognised by the per-framework route extractors which
|
||||
// call `function_params_route_handler`
|
||||
// (`include_id_like_typed = true`) — those bypass this
|
||||
// filter so id-shaped path params survive on real routes.
|
||||
//
|
||||
// Real-repo trigger: `/Users/elipeter/oss/gitea` ─ ~957
|
||||
// `go.auth.missing_ownership_check` findings on backend
|
||||
// helpers like
|
||||
// `func GetRunByRepoAndID(ctx context.Context,
|
||||
// repoID, runID int64)`,
|
||||
// `func DeleteRunner(ctx context.Context, id int64)`,
|
||||
// and the entire `models/...` DAO layer where the
|
||||
// ownership check sits in the calling route handler.
|
||||
// Same shape over-fires on minio's `cmd/iam-*-store`
|
||||
// helpers and would on every Go ORM/DAO codebase.
|
||||
let type_is_bounded_scalar = type_node
|
||||
.map(|t| is_go_bounded_scalar_type(t, bytes))
|
||||
.unwrap_or(false);
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children_by_field_name("name", &mut cursor) {
|
||||
if child.kind() == "identifier" {
|
||||
let name = text(child, bytes);
|
||||
if !name.is_empty() && !out.contains(&name) {
|
||||
out.push(name);
|
||||
if name.is_empty() || out.contains(&name) {
|
||||
continue;
|
||||
}
|
||||
if !include_id_like_typed
|
||||
&& type_is_bounded_scalar
|
||||
&& is_go_id_like_typed_param(&name)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
out.push(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -3635,6 +3754,56 @@ fn is_python_id_like_typed_param(name: &str) -> bool {
|
|||
lower == "id" || lower.ends_with("id") || lower.ends_with("_id") || lower.ends_with("ids")
|
||||
}
|
||||
|
||||
/// Same shape predicate used by the Go typed-param fallback in
|
||||
/// `collect_param_names`. Kept separate from the Python helper so the
|
||||
/// two recognisers can diverge if/when language-specific spellings
|
||||
/// emerge; the current vocabulary is the same canonical id-suffix
|
||||
/// set as `auth_analysis::checks::is_id_like_name`.
|
||||
fn is_go_id_like_typed_param(name: &str) -> bool {
|
||||
let lower = name.to_ascii_lowercase();
|
||||
lower == "id" || lower.ends_with("id") || lower.ends_with("_id") || lower.ends_with("ids")
|
||||
}
|
||||
|
||||
/// True iff `type_node` names a Go bounded primitive scalar:
|
||||
/// integer (`int*` / `uint*` / `byte` / `rune` / `uintptr`), floating
|
||||
/// point (`float32` / `float64`), `bool`, or `string`. Used by the
|
||||
/// Go arm of `collect_param_names` to recognise the
|
||||
/// "id-like name + scalar type" DAO-helper shape and refuse to lift
|
||||
/// such params into `unit.params` for non-route units.
|
||||
///
|
||||
/// Conservative scope: only bare `type_identifier` matches. Pointer
|
||||
/// types (`*Foo`), generic types (`Map[K, V]`), qualified types
|
||||
/// (`pkg.Type`), and slice/array types (`[]T`) are framework or
|
||||
/// payload shapes, NOT bounded primitives, so they're left alone and
|
||||
/// the param keeps its name. This keeps real handler shapes that
|
||||
/// happen to spell an id-like name on a complex type (`req
|
||||
/// *RequestWithID`) from being silently dropped.
|
||||
fn is_go_bounded_scalar_type(type_node: Node<'_>, bytes: &[u8]) -> bool {
|
||||
if type_node.kind() != "type_identifier" {
|
||||
return false;
|
||||
}
|
||||
matches!(
|
||||
text(type_node, bytes).as_str(),
|
||||
"int"
|
||||
| "int8"
|
||||
| "int16"
|
||||
| "int32"
|
||||
| "int64"
|
||||
| "uint"
|
||||
| "uint8"
|
||||
| "uint16"
|
||||
| "uint32"
|
||||
| "uint64"
|
||||
| "uintptr"
|
||||
| "byte"
|
||||
| "rune"
|
||||
| "float32"
|
||||
| "float64"
|
||||
| "bool"
|
||||
| "string"
|
||||
)
|
||||
}
|
||||
|
||||
pub fn is_function_like(node: Node<'_>) -> bool {
|
||||
matches!(
|
||||
node.kind(),
|
||||
|
|
@ -4080,20 +4249,41 @@ fn subscript_value_ref(node: Node<'_>, bytes: &[u8]) -> Option<ValueRef> {
|
|||
|
||||
pub fn member_chain(node: Node<'_>, bytes: &[u8]) -> Vec<String> {
|
||||
if node.kind() == "call" {
|
||||
let mut chain = if let Some(receiver) = node.child_by_field_name("receiver") {
|
||||
member_chain(receiver, bytes)
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
// Ruby-style call: explicit receiver field + method/name field.
|
||||
if let Some(receiver) = node.child_by_field_name("receiver") {
|
||||
let mut chain = member_chain(receiver, bytes);
|
||||
let method = node
|
||||
.child_by_field_name("method")
|
||||
.or_else(|| node.child_by_field_name("name"))
|
||||
.map(|method| text(method, bytes))
|
||||
.unwrap_or_default();
|
||||
if !method.is_empty() {
|
||||
chain.push(method);
|
||||
}
|
||||
return chain;
|
||||
}
|
||||
// Python-style call: callable expression in the `function` field.
|
||||
// Recursing into it lets chained shapes like
|
||||
// `select(X).filter_by(...)` produce `["select()", "filter_by"]`
|
||||
// — the parent attribute branch appends `()` when its `object`
|
||||
// is a call, marking the intermediate-call shape so that
|
||||
// `receiver_is_chained_call` detects it. Closes airflow-style
|
||||
// SQLAlchemy queryset-builder chains that previously reduced to
|
||||
// bare `["filter_by"]`.
|
||||
if let Some(function) = node.child_by_field_name("function") {
|
||||
return member_chain(function, bytes);
|
||||
}
|
||||
// Bare-method fallback for parser shapes that expose method/name
|
||||
// without a receiver (Ruby implicit-self calls, etc.).
|
||||
let method = node
|
||||
.child_by_field_name("method")
|
||||
.or_else(|| node.child_by_field_name("name"))
|
||||
.map(|method| text(method, bytes))
|
||||
.unwrap_or_default();
|
||||
if !method.is_empty() {
|
||||
chain.push(method);
|
||||
return vec![method];
|
||||
}
|
||||
return chain;
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
if node.kind() == "method_invocation" || node.kind() == "method_call_expression" {
|
||||
|
|
@ -4164,7 +4354,23 @@ pub fn member_chain(node: Node<'_>, bytes: &[u8]) -> Vec<String> {
|
|||
.or_else(|| node.child_by_field_name("operand"))
|
||||
.or_else(|| node.child_by_field_name("argument"))
|
||||
{
|
||||
chain.extend(member_chain(object, bytes));
|
||||
let object_is_call = matches!(
|
||||
object.kind(),
|
||||
"call" | "call_expression" | "method_invocation" | "method_call_expression"
|
||||
);
|
||||
let mut sub = member_chain(object, bytes);
|
||||
// Mark intermediate-call segments with `()` so a downstream
|
||||
// chain like `select(X).filter_by(...)` becomes
|
||||
// `["select()", "filter_by"]` rather than `["select", "filter_by"]`.
|
||||
// `receiver_is_chained_call` consults the `(` to detect the
|
||||
// opaque-builder receiver.
|
||||
if object_is_call
|
||||
&& sub.last().map(|s| !s.ends_with(')')).unwrap_or(false)
|
||||
&& let Some(last) = sub.last_mut()
|
||||
{
|
||||
last.push_str("()");
|
||||
}
|
||||
chain.extend(sub);
|
||||
}
|
||||
if let Some(property) = node
|
||||
.child_by_field_name("property")
|
||||
|
|
@ -4876,6 +5082,200 @@ mod tests {
|
|||
assert!(!params.contains(&"int".to_string()), "got {:?}", params);
|
||||
}
|
||||
|
||||
/// DAO-helper shape (`func GetRunByRepoAndID(ctx context.Context,
|
||||
/// repoID, runID int64)`): id-like names with bounded primitive
|
||||
/// scalar types are caller-passed scope identifiers, NOT user
|
||||
/// input. For non-route units (`function_params`,
|
||||
/// `include_id_like_typed = false`), they must NOT lift into
|
||||
/// `unit.params` — that would gate `unit_has_user_input_evidence`
|
||||
/// open on every internal Go ORM helper and over-fire
|
||||
/// `go.auth.missing_ownership_check`.
|
||||
///
|
||||
/// Real-repo trigger:
|
||||
/// `/Users/elipeter/oss/gitea/models/actions/run_job.go::
|
||||
/// GetRunByRepoAndID` and ~957 sibling helpers across gitea's
|
||||
/// `models/...` DAO layer. Same shape over-fires on minio's
|
||||
/// `cmd/iam-*-store` and is the canonical Go ORM helper signature.
|
||||
#[test]
|
||||
fn collect_param_names_go_drops_id_like_scalar_params_for_dao_helper() {
|
||||
use super::function_params;
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter::Language::from(tree_sitter_go::LANGUAGE))
|
||||
.unwrap();
|
||||
let src =
|
||||
b"package x\nfunc GetRunByRepoAndID(ctx context.Context, repoID, runID int64) {}\n";
|
||||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||||
let func = (0..tree.root_node().named_child_count())
|
||||
.filter_map(|i| tree.root_node().named_child(i as u32))
|
||||
.find(|n| n.kind() == "function_declaration")
|
||||
.expect("file should have a function_declaration");
|
||||
let params = function_params(func, src);
|
||||
assert!(
|
||||
!params.contains(&"ctx".to_string()),
|
||||
"context.Context dropped: got {:?}",
|
||||
params
|
||||
);
|
||||
assert!(
|
||||
!params.contains(&"repoID".to_string()),
|
||||
"id-like scalar param dropped for DAO helper: got {:?}",
|
||||
params
|
||||
);
|
||||
assert!(
|
||||
!params.contains(&"runID".to_string()),
|
||||
"id-like scalar param dropped for DAO helper: got {:?}",
|
||||
params
|
||||
);
|
||||
assert!(
|
||||
params.is_empty(),
|
||||
"no params survive on DAO-shape helper: got {:?}",
|
||||
params
|
||||
);
|
||||
}
|
||||
|
||||
/// Conservative scope: only **bounded primitive scalar** types
|
||||
/// trigger the id-like drop. Pointer / struct / slice types are
|
||||
/// payload shapes that may or may not be user-controlled — leave
|
||||
/// them alone so non-DAO helpers retain their evidence.
|
||||
#[test]
|
||||
fn collect_param_names_go_keeps_id_like_pointer_struct_param() {
|
||||
use super::function_params;
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter::Language::from(tree_sitter_go::LANGUAGE))
|
||||
.unwrap();
|
||||
// `runnerID *Runner` — id-like name, but the type is a pointer
|
||||
// (payload shape), so the param name must survive.
|
||||
let src = b"package x\nfunc UpdateRunner(ctx context.Context, runnerID *Runner) {}\n";
|
||||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||||
let func = (0..tree.root_node().named_child_count())
|
||||
.filter_map(|i| tree.root_node().named_child(i as u32))
|
||||
.find(|n| n.kind() == "function_declaration")
|
||||
.expect("file should have a function_declaration");
|
||||
let params = function_params(func, src);
|
||||
assert!(
|
||||
params.contains(&"runnerID".to_string()),
|
||||
"id-like pointer param survives: got {:?}",
|
||||
params
|
||||
);
|
||||
}
|
||||
|
||||
/// Route handlers go through `function_params_route_handler`
|
||||
/// (`include_id_like_typed = true`) — the id-like-scalar filter
|
||||
/// must NOT trip there. Path-param-on-REST-route is *the*
|
||||
/// primary user input and middleware-injected auth checks rely on
|
||||
/// these names being present in `unit.params`.
|
||||
#[test]
|
||||
fn collect_param_names_go_route_handler_keeps_id_like_scalar_params() {
|
||||
use super::function_params_route_handler;
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter::Language::from(tree_sitter_go::LANGUAGE))
|
||||
.unwrap();
|
||||
let src = b"package x\nfunc GetRepo(ctx context.Context, repoID int64) {}\n";
|
||||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||||
let func = (0..tree.root_node().named_child_count())
|
||||
.filter_map(|i| tree.root_node().named_child(i as u32))
|
||||
.find(|n| n.kind() == "function_declaration")
|
||||
.expect("file should have a function_declaration");
|
||||
let params = function_params_route_handler(func, src);
|
||||
assert!(
|
||||
params.contains(&"repoID".to_string()),
|
||||
"id-like scalar param kept for route handler: got {:?}",
|
||||
params
|
||||
);
|
||||
}
|
||||
|
||||
/// Pin `member_chain` output for the SQLAlchemy queryset chain
|
||||
/// `select(C).filter_by(id=x)`. Pre-fix, Python `call` nodes use a
|
||||
/// `function` field (not `receiver`/`method`) so the recursive call
|
||||
/// arm returned an empty Vec, reducing the chain to bare
|
||||
/// `["filter_by"]`. The fix: (1) traverse `function` field in the
|
||||
/// `call` arm; (2) the parent attribute branch appends `()` to last
|
||||
/// segment when its `object` is a call. Together they produce
|
||||
/// `["select()", "filter_by"]` so `receiver_is_chained_call` detects
|
||||
/// the intermediate-call shape.
|
||||
#[test]
|
||||
fn member_chain_python_select_filter_by_chain_marks_intermediate_call() {
|
||||
use super::{callee_name, member_chain};
|
||||
use tree_sitter::{Node, Parser};
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter::Language::from(tree_sitter_python::LANGUAGE))
|
||||
.unwrap();
|
||||
let src = b"x = select(C).filter_by(id=u)\n";
|
||||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||||
|
||||
fn find_outer_call<'a>(node: Node<'a>) -> Option<Node<'a>> {
|
||||
if node.kind() == "call"
|
||||
&& let Some(function) = node.child_by_field_name("function")
|
||||
&& function.kind() == "attribute"
|
||||
{
|
||||
return Some(node);
|
||||
}
|
||||
for i in 0..node.named_child_count() {
|
||||
if let Some(child) = node.named_child(i as u32)
|
||||
&& let Some(found) = find_outer_call(child)
|
||||
{
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
let outer_call = find_outer_call(tree.root_node())
|
||||
.expect("expected outer call node `select(C).filter_by(id=u)`");
|
||||
|
||||
assert_eq!(
|
||||
member_chain(outer_call, src),
|
||||
vec!["select()".to_string(), "filter_by".to_string()],
|
||||
"Python chained call must produce `[select(), filter_by]` so receiver_is_chained_call detects the intermediate-call shape",
|
||||
);
|
||||
assert_eq!(
|
||||
callee_name(outer_call, src),
|
||||
"select().filter_by".to_string(),
|
||||
"callee_name joins the chain with `.`",
|
||||
);
|
||||
}
|
||||
|
||||
/// Regression guard: simple Python `obj.method(arg)` callees keep
|
||||
/// their previous `member_chain` output (`["obj", "method"]`). The
|
||||
/// `function`-field traversal must not pollute non-chained shapes.
|
||||
#[test]
|
||||
fn member_chain_python_simple_attribute_call_unchanged() {
|
||||
use super::callee_name;
|
||||
use tree_sitter::{Node, Parser};
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter::Language::from(tree_sitter_python::LANGUAGE))
|
||||
.unwrap();
|
||||
let src = b"x = obj.method(a)\n";
|
||||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||||
|
||||
fn find_call<'a>(node: Node<'a>) -> Option<Node<'a>> {
|
||||
if node.kind() == "call" {
|
||||
return Some(node);
|
||||
}
|
||||
for i in 0..node.named_child_count() {
|
||||
if let Some(child) = node.named_child(i as u32)
|
||||
&& let Some(found) = find_call(child)
|
||||
{
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
let call_node = find_call(tree.root_node()).expect("expected `obj.method(a)` call");
|
||||
assert_eq!(
|
||||
callee_name(call_node, src),
|
||||
"obj.method".to_string(),
|
||||
"simple attribute call must not pick up `()` markers",
|
||||
);
|
||||
}
|
||||
|
||||
mod ruby_visibility_and_callbacks {
|
||||
use super::super::{
|
||||
RubyVisibility, ruby_callback_target_names, ruby_method_is_callback_or_private,
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ use super::common::{
|
|||
string_literal_value, text, visit_named_nodes,
|
||||
};
|
||||
use crate::auth_analysis::config::{AuthAnalysisRules, matches_name};
|
||||
use crate::auth_analysis::extract::common::{attach_route_handler, collect_top_level_units};
|
||||
use crate::auth_analysis::extract::common::attach_route_handler;
|
||||
use crate::auth_analysis::model::{
|
||||
AnalysisUnitKind, AuthorizationModel, CallSite, Framework, HttpMethod,
|
||||
};
|
||||
|
|
@ -29,18 +29,14 @@ impl AuthExtractor for DjangoExtractor {
|
|||
bytes: &[u8],
|
||||
path: &Path,
|
||||
rules: &AuthAnalysisRules,
|
||||
) -> AuthorizationModel {
|
||||
model: &mut AuthorizationModel,
|
||||
) {
|
||||
let root = tree.root_node();
|
||||
let mut model = AuthorizationModel::default();
|
||||
|
||||
collect_top_level_units(root, bytes, rules, &mut model);
|
||||
visit_named_nodes(root, &mut |node| {
|
||||
if node.kind() == "call" {
|
||||
maybe_collect_django_path(root, node, bytes, path, rules, &mut model);
|
||||
maybe_collect_django_path(root, node, bytes, path, rules, model);
|
||||
}
|
||||
});
|
||||
|
||||
model
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
use super::AuthExtractor;
|
||||
use super::common::{
|
||||
attach_route_handler, call_site_from_node, collect_top_level_units, http_method_from_name,
|
||||
is_handler_reference, join_route_paths, member_target, named_children, push_route_registration,
|
||||
string_literal_value, text, visit_named_nodes,
|
||||
attach_route_handler, call_site_from_node, http_method_from_name, is_handler_reference,
|
||||
join_route_paths, member_target, named_children, push_route_registration, string_literal_value,
|
||||
text, visit_named_nodes,
|
||||
};
|
||||
use crate::auth_analysis::config::AuthAnalysisRules;
|
||||
use crate::auth_analysis::model::{AuthorizationModel, CallSite, Framework};
|
||||
|
|
@ -26,24 +26,21 @@ impl AuthExtractor for EchoExtractor {
|
|||
bytes: &[u8],
|
||||
path: &Path,
|
||||
rules: &AuthAnalysisRules,
|
||||
) -> AuthorizationModel {
|
||||
model: &mut AuthorizationModel,
|
||||
) {
|
||||
let root = tree.root_node();
|
||||
let mut model = AuthorizationModel::default();
|
||||
let mut groups = HashMap::new();
|
||||
|
||||
collect_top_level_units(root, bytes, rules, &mut model);
|
||||
visit_named_nodes(root, &mut |node| match node.kind() {
|
||||
"short_var_declaration" | "assignment_statement" => {
|
||||
maybe_collect_group_binding(node, bytes, &mut groups)
|
||||
}
|
||||
"call_expression" => {
|
||||
maybe_collect_group_use(node, bytes, &mut groups);
|
||||
maybe_collect_route(root, node, bytes, path, rules, &groups, &mut model);
|
||||
maybe_collect_route(root, node, bytes, path, rules, &groups, model);
|
||||
}
|
||||
_ => {}
|
||||
});
|
||||
|
||||
model
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
use super::AuthExtractor;
|
||||
use super::common::{
|
||||
attach_route_handler, call_site_from_node, collect_top_level_units, http_method_from_name,
|
||||
is_handler_reference, member_target, named_children, push_route_registration,
|
||||
string_literal_value, visit_named_nodes,
|
||||
attach_route_handler, call_site_from_node, http_method_from_name, is_handler_reference,
|
||||
member_target, named_children, push_route_registration, string_literal_value,
|
||||
visit_named_nodes,
|
||||
};
|
||||
use crate::auth_analysis::config::AuthAnalysisRules;
|
||||
use crate::auth_analysis::model::{AuthorizationModel, Framework};
|
||||
|
|
@ -25,18 +25,14 @@ impl AuthExtractor for ExpressExtractor {
|
|||
bytes: &[u8],
|
||||
path: &Path,
|
||||
rules: &AuthAnalysisRules,
|
||||
) -> AuthorizationModel {
|
||||
model: &mut AuthorizationModel,
|
||||
) {
|
||||
let root = tree.root_node();
|
||||
let mut model = AuthorizationModel::default();
|
||||
|
||||
collect_top_level_units(root, bytes, rules, &mut model);
|
||||
visit_named_nodes(root, &mut |node| {
|
||||
if node.kind() == "call_expression" {
|
||||
maybe_collect_route(root, node, bytes, path, rules, &mut model);
|
||||
maybe_collect_route(root, node, bytes, path, rules, model);
|
||||
}
|
||||
});
|
||||
|
||||
model
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
use super::AuthExtractor;
|
||||
use super::common::{
|
||||
attach_route_handler, call_sites_from_value, collect_top_level_units, http_method_from_name,
|
||||
is_handler_reference, member_target, named_children, object_property_value,
|
||||
push_route_registration, string_literal_value, visit_named_nodes,
|
||||
attach_route_handler, call_sites_from_value, http_method_from_name, is_handler_reference,
|
||||
member_target, named_children, object_property_value, push_route_registration,
|
||||
string_literal_value, visit_named_nodes,
|
||||
};
|
||||
use crate::auth_analysis::config::AuthAnalysisRules;
|
||||
use crate::auth_analysis::model::{AuthorizationModel, CallSite, Framework};
|
||||
|
|
@ -25,19 +25,15 @@ impl AuthExtractor for FastifyExtractor {
|
|||
bytes: &[u8],
|
||||
path: &Path,
|
||||
rules: &AuthAnalysisRules,
|
||||
) -> AuthorizationModel {
|
||||
model: &mut AuthorizationModel,
|
||||
) {
|
||||
let root = tree.root_node();
|
||||
let mut model = AuthorizationModel::default();
|
||||
|
||||
collect_top_level_units(root, bytes, rules, &mut model);
|
||||
visit_named_nodes(root, &mut |node| {
|
||||
if node.kind() == "call_expression" {
|
||||
maybe_collect_shorthand_route(root, node, bytes, path, rules, &mut model);
|
||||
maybe_collect_route_object(root, node, bytes, path, rules, &mut model);
|
||||
maybe_collect_shorthand_route(root, node, bytes, path, rules, model);
|
||||
maybe_collect_route_object(root, node, bytes, path, rules, model);
|
||||
}
|
||||
});
|
||||
|
||||
model
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,15 +4,27 @@ use super::common::{
|
|||
push_route_registration, string_literal_value, text, visit_named_nodes,
|
||||
};
|
||||
use crate::auth_analysis::config::{AuthAnalysisRules, matches_name};
|
||||
use crate::auth_analysis::extract::common::{collect_top_level_units, decorated_definition_child};
|
||||
use crate::auth_analysis::model::{AuthorizationModel, CallSite, Framework, HttpMethod};
|
||||
use crate::auth_analysis::extract::common::decorated_definition_child;
|
||||
use crate::auth_analysis::model::{
|
||||
AuthCheck, AuthCheckKind, AuthorizationModel, CallSite, Framework, HttpMethod,
|
||||
};
|
||||
use crate::labels::bare_method_name;
|
||||
use crate::utils::project::{DetectedFramework, FrameworkContext};
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
pub struct FlaskExtractor;
|
||||
|
||||
/// Map from a module-level router/app variable name to the
|
||||
/// `dependencies=[...]` deps declared on its constructor call. FastAPI
|
||||
/// propagates these to every route attached via
|
||||
/// `@<router>.<verb>(...)`, so the route extractor must merge them in
|
||||
/// before running ownership / membership checks. Each entry follows
|
||||
/// the same shape as `extract_fastapi_dependencies` produces:
|
||||
/// `(CallSite, is_scoped_security)`. See `collect_router_level_dependencies`.
|
||||
type RouterLevelDepMap = HashMap<String, Vec<(CallSite, bool)>>;
|
||||
|
||||
impl AuthExtractor for FlaskExtractor {
|
||||
fn supports(&self, lang: &str, framework_ctx: Option<&FrameworkContext>) -> bool {
|
||||
lang == "python"
|
||||
|
|
@ -26,18 +38,52 @@ impl AuthExtractor for FlaskExtractor {
|
|||
bytes: &[u8],
|
||||
path: &Path,
|
||||
rules: &AuthAnalysisRules,
|
||||
) -> AuthorizationModel {
|
||||
model: &mut AuthorizationModel,
|
||||
) {
|
||||
let root = tree.root_node();
|
||||
let mut model = AuthorizationModel::default();
|
||||
|
||||
collect_top_level_units(root, bytes, rules, &mut model);
|
||||
// Pass 1: pre-walk for module-level router/app assignments
|
||||
// (`ti_id_router = VersionedAPIRouter(dependencies=[Security(...)])`).
|
||||
// FastAPI applies router-level deps to every attached route, so
|
||||
// every per-route `@<router>.<verb>(...)` decorator must merge
|
||||
// the router's deps before the ownership check fires. Without
|
||||
// this, airflow's execution-API routes that re-use a single
|
||||
// `ti_id_router` declared once at module scope inherit no auth
|
||||
// and flag `missing_ownership_check` despite being authorized.
|
||||
let mut router_deps = collect_router_level_dependencies(root, bytes);
|
||||
// Merge in cross-file router-deps lifted via
|
||||
// `<parent>.include_router(<this_file>.<router>, ...)` calls in
|
||||
// other project files — pre-resolved by the orchestrator at
|
||||
// pass 2 entry from `GlobalSummaries.router_facts_by_module`.
|
||||
// Cross-file deps are PREPENDED to mirror FastAPI's runtime
|
||||
// ordering (parent router deps run before any in-file router
|
||||
// deps and before per-route deps). Empty when global summaries
|
||||
// are unavailable (single-file scan / unit-test paths).
|
||||
if !model.cross_file_router_deps.is_empty() {
|
||||
for (router_var, cross_deps) in &model.cross_file_router_deps {
|
||||
if cross_deps.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let entry = router_deps.entry(router_var.clone()).or_default();
|
||||
let mut merged: Vec<(CallSite, bool)> = cross_deps.clone();
|
||||
// Dedup so an inline `dependencies=[Security(...)]` and a
|
||||
// cross-file lift of the same `Security(callee)` don't
|
||||
// double-fire downstream auth checks.
|
||||
for dep in entry.iter() {
|
||||
let already = merged
|
||||
.iter()
|
||||
.any(|(call, scoped)| call.name == dep.0.name && *scoped == dep.1);
|
||||
if !already {
|
||||
merged.push(dep.clone());
|
||||
}
|
||||
}
|
||||
*entry = merged;
|
||||
}
|
||||
}
|
||||
visit_named_nodes(root, &mut |node| {
|
||||
if node.kind() == "decorated_definition" {
|
||||
maybe_collect_flask_route(root, node, bytes, path, rules, &mut model);
|
||||
maybe_collect_flask_route(root, node, bytes, path, rules, model, &router_deps);
|
||||
}
|
||||
});
|
||||
|
||||
model
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -54,6 +100,7 @@ fn maybe_collect_flask_route(
|
|||
path: &Path,
|
||||
rules: &AuthAnalysisRules,
|
||||
model: &mut AuthorizationModel,
|
||||
router_deps: &RouterLevelDepMap,
|
||||
) {
|
||||
let Some(definition) = decorated_definition_child(node) else {
|
||||
return;
|
||||
|
|
@ -63,21 +110,44 @@ fn maybe_collect_flask_route(
|
|||
}
|
||||
|
||||
let mut route_specs = Vec::new();
|
||||
let mut middleware_calls = Vec::new();
|
||||
let mut middleware_calls: Vec<(CallSite, bool)> = Vec::new();
|
||||
for decorator in decorator_expressions(node) {
|
||||
if let Some(mut specs) = parse_flask_route_decorator(decorator, bytes) {
|
||||
route_specs.append(&mut specs);
|
||||
// FastAPI propagates router-level `dependencies=[...]` from
|
||||
// `<router> = APIRouter(...)` to every attached
|
||||
// `@<router>.<verb>(...)` route. Look up the decorator's
|
||||
// router prefix in the pre-built map and merge its deps
|
||||
// BEFORE the route-level deps so the ordering matches
|
||||
// FastAPI runtime semantics (router deps run before route
|
||||
// deps). Without this, airflow execution-API routes that
|
||||
// declare auth once at the router level fire spurious
|
||||
// `missing_ownership_check` / `token_override` findings.
|
||||
if let Some(prefix) = router_prefix_from_decorator(decorator, bytes)
|
||||
&& let Some(deps) = router_deps.get(&prefix)
|
||||
{
|
||||
middleware_calls.extend(deps.iter().cloned());
|
||||
}
|
||||
// FastAPI puts route-level dependencies (auth checks +
|
||||
// logging hooks) inside the route decorator's
|
||||
// `dependencies=[Depends(...)]` keyword argument, instead
|
||||
// of as separate `@decorator` lines like Flask. Walk the
|
||||
// route decorator's keyword args for that shape and lift
|
||||
// each `Depends(call(...))` element into the
|
||||
// middleware_calls list, so the same `inject_middleware_auth`
|
||||
// path that Flask uses also picks up FastAPI auth deps.
|
||||
// each `Depends(call(...))` / `Security(call, scopes=[...])`
|
||||
// element into the middleware_calls list, so the same
|
||||
// `inject_middleware_auth` path that Flask uses also
|
||||
// picks up FastAPI auth deps. The boolean tracks whether
|
||||
// the wrapper was a scoped `Security(...)` — those are
|
||||
// OAuth2-scope-checked authorization (not just login),
|
||||
// so the AuthCheckKind is promoted in
|
||||
// `inject_middleware_auth`.
|
||||
middleware_calls.extend(extract_fastapi_dependencies(decorator, bytes));
|
||||
} else {
|
||||
middleware_calls.extend(expand_decorator_calls(decorator, bytes));
|
||||
middleware_calls.extend(
|
||||
expand_decorator_calls(decorator, bytes)
|
||||
.into_iter()
|
||||
.map(|c| (c, false)),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -104,6 +174,10 @@ fn maybe_collect_flask_route(
|
|||
rules,
|
||||
);
|
||||
|
||||
let registration_calls: Vec<CallSite> = middleware_calls
|
||||
.iter()
|
||||
.map(|(call, _)| call.clone())
|
||||
.collect();
|
||||
push_route_registration(
|
||||
model,
|
||||
Framework::Flask,
|
||||
|
|
@ -111,7 +185,7 @@ fn maybe_collect_flask_route(
|
|||
spec.path,
|
||||
path,
|
||||
handler,
|
||||
middleware_calls.clone(),
|
||||
registration_calls,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -272,19 +346,25 @@ fn expand_decorator_calls(node: Node<'_>, bytes: &[u8]) -> Vec<CallSite> {
|
|||
}
|
||||
|
||||
/// Walk the route-decorator call's keyword args looking for the FastAPI
|
||||
/// `dependencies=[Depends(call(...)), Depends(call), ...]` shape. For
|
||||
/// each `Depends(...)` list element, extract the inner callable as a
|
||||
/// `CallSite` so it can flow through `inject_middleware_auth` and be
|
||||
/// matched against the per-language authorization-check / login-guard
|
||||
/// name lists. Refuses non-call elements and `Depends(...)` without a
|
||||
/// recognised inner call shape.
|
||||
/// `dependencies=[Depends(call(...)), Security(call, scopes=[...]), ...]`
|
||||
/// shape. For each `Depends(...)` / `Security(...)` list element,
|
||||
/// extract the inner callable as a `CallSite` so it can flow through
|
||||
/// `inject_middleware_auth` and be matched against the per-language
|
||||
/// authorization-check / login-guard name lists. Refuses non-call
|
||||
/// elements and markers without a recognised inner call shape.
|
||||
///
|
||||
/// Returns `(CallSite, is_scoped_security)` pairs. The boolean is
|
||||
/// `true` when the wrapper was `Security(...)` carrying a non-empty
|
||||
/// `scopes=[...]` kwarg — those are OAuth2-scope-checked authorization
|
||||
/// (FastAPI semantics), not bare login dependency, so
|
||||
/// `inject_middleware_auth` promotes the `AuthCheckKind`.
|
||||
///
|
||||
/// The function is decoupled from Flask semantics (Flask routes never
|
||||
/// use `dependencies=`); the lookup is purely structural and matches
|
||||
/// FastAPI's documented dependency-injection convention. Lives in the
|
||||
/// flask module because Flask's route-decorator parser already targets
|
||||
/// the `@<router>.<method>(<path>, ...)` shape that FastAPI shares.
|
||||
fn extract_fastapi_dependencies(decorator_expr: Node<'_>, bytes: &[u8]) -> Vec<CallSite> {
|
||||
fn extract_fastapi_dependencies(decorator_expr: Node<'_>, bytes: &[u8]) -> Vec<(CallSite, bool)> {
|
||||
if decorator_expr.kind() != "call" {
|
||||
return Vec::new();
|
||||
}
|
||||
|
|
@ -296,47 +376,232 @@ fn extract_fastapi_dependencies(decorator_expr: Node<'_>, bytes: &[u8]) -> Vec<C
|
|||
};
|
||||
let mut out = Vec::new();
|
||||
for element in named_children(value) {
|
||||
if let Some(call) = unwrap_depends_call(element, bytes) {
|
||||
out.push(call);
|
||||
if let Some(unwrapped) = unwrap_depends_call(element, bytes) {
|
||||
out.push(unwrapped);
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Unwrap one `Depends(...)` list element from a FastAPI `dependencies`
|
||||
/// list and return the inner callable as a `CallSite`. Three shapes
|
||||
/// are accepted:
|
||||
/// Walk the module root for top-level assignments of the form
|
||||
/// `<router> = <RouterCtor>(..., dependencies=[Depends(...), Security(...)])`
|
||||
/// and build a map from the router variable name to its router-level
|
||||
/// dependency CallSites. FastAPI applies these to every attached
|
||||
/// `@<router>.<verb>(...)` route at runtime — the per-route extractor
|
||||
/// merges them in before running ownership / membership checks.
|
||||
///
|
||||
/// Recognised router/app constructors (callee-tail-name match, so
|
||||
/// `fastapi.APIRouter(...)` and `routing.APIRouter(...)` both work):
|
||||
/// * `APIRouter` (FastAPI canonical)
|
||||
/// * `FastAPI` (FastAPI app object — `dependencies=[...]` on the app
|
||||
/// applies to every route under it)
|
||||
/// * `VersionedAPIRouter` (airflow-specific subclass)
|
||||
/// * Any callee whose tail name ends with `Router` — covers
|
||||
/// project-specific `APIRouter` subclasses without the airflow-
|
||||
/// specific allowlist needing to grow per-codebase. Conservative:
|
||||
/// the lookup only ever fires when the route decorator's prefix
|
||||
/// matches a captured variable, so over-matching the constructor
|
||||
/// doesn't produce false auth attribution unless the same name is
|
||||
/// also used as a route decorator's receiver — extremely rare.
|
||||
///
|
||||
/// The walk is restricted to module-root expression statements / typed
|
||||
/// assignments — nested function-local routers aren't supported (and
|
||||
/// don't appear in real-world FastAPI codebases — the router pattern is
|
||||
/// always module-scoped so it can be imported into the app at startup).
|
||||
fn collect_router_level_dependencies(root: Node<'_>, bytes: &[u8]) -> RouterLevelDepMap {
|
||||
let mut out: RouterLevelDepMap = HashMap::new();
|
||||
for child in named_children(root) {
|
||||
// Top-level shape: `expression_statement` wrapping an
|
||||
// `assignment` (Python tree-sitter convention). Also accept
|
||||
// bare `assignment` in case the grammar changes.
|
||||
let assign = match child.kind() {
|
||||
"expression_statement" => named_children(child).into_iter().next(),
|
||||
"assignment" => Some(child),
|
||||
_ => None,
|
||||
};
|
||||
let Some(assign) = assign else { continue };
|
||||
if assign.kind() != "assignment" {
|
||||
continue;
|
||||
}
|
||||
let Some(left) = assign.child_by_field_name("left") else {
|
||||
continue;
|
||||
};
|
||||
if left.kind() != "identifier" {
|
||||
continue;
|
||||
}
|
||||
let Some(right) = assign.child_by_field_name("right") else {
|
||||
continue;
|
||||
};
|
||||
if right.kind() != "call" {
|
||||
continue;
|
||||
}
|
||||
let Some(function) = right.child_by_field_name("function") else {
|
||||
continue;
|
||||
};
|
||||
let function_text = text(function, bytes);
|
||||
if !is_router_like_constructor(&function_text) {
|
||||
continue;
|
||||
}
|
||||
let Some(arguments) = right.child_by_field_name("arguments") else {
|
||||
continue;
|
||||
};
|
||||
let Some(deps_value) = keyword_argument_value(arguments, bytes, "dependencies") else {
|
||||
continue;
|
||||
};
|
||||
let mut deps = Vec::new();
|
||||
for element in named_children(deps_value) {
|
||||
if let Some(unwrapped) = unwrap_depends_call(element, bytes) {
|
||||
deps.push(unwrapped);
|
||||
}
|
||||
}
|
||||
if deps.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let var_name = text(left, bytes).trim().to_string();
|
||||
if var_name.is_empty() {
|
||||
continue;
|
||||
}
|
||||
// First declaration wins. A `<router> = …` re-assignment
|
||||
// would be unusual at module scope; if it happens, the first
|
||||
// dependency declaration is conservatively the one that
|
||||
// applies to most routes attached after it.
|
||||
out.entry(var_name).or_insert(deps);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// True for callee text that looks like a FastAPI router or app
|
||||
/// constructor. Tail-name match (after the last `.`) so
|
||||
/// `fastapi.APIRouter` / `routing.APIRouter` / bare `APIRouter` all
|
||||
/// hit, plus airflow's `VersionedAPIRouter` subclass and any project-
|
||||
/// specific `*Router` callable. See `collect_router_level_dependencies`
|
||||
/// for the wider rationale.
|
||||
fn is_router_like_constructor(callee: &str) -> bool {
|
||||
let trimmed = callee.trim();
|
||||
let tail = trimmed.rsplit('.').next().unwrap_or(trimmed);
|
||||
if tail == "APIRouter" || tail == "FastAPI" || tail == "VersionedAPIRouter" {
|
||||
return true;
|
||||
}
|
||||
// `*Router` suffix — covers project-specific subclasses without an
|
||||
// exhaustive allowlist. Reject empty / single-char / lowercase
|
||||
// tails to avoid catching arbitrary identifiers.
|
||||
if tail.len() > "Router".len()
|
||||
&& tail.ends_with("Router")
|
||||
&& tail.chars().next().is_some_and(|c| c.is_ascii_uppercase())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Extract the router-receiver identifier from a route-decorator call
|
||||
/// node. Decorator shape: `@<router>.<verb>(<path>, ...)` — the
|
||||
/// callee is `<router>.<verb>`, so the prefix is everything before the
|
||||
/// last `.`. Returns `None` for decorators that don't match the
|
||||
/// expected `attribute`-style shape (e.g. bare `@requires_auth` or
|
||||
/// `@blueprint.route("/x")` where the attribute is the verb itself).
|
||||
fn router_prefix_from_decorator(decorator_expr: Node<'_>, bytes: &[u8]) -> Option<String> {
|
||||
if decorator_expr.kind() != "call" {
|
||||
return None;
|
||||
}
|
||||
let function = decorator_expr.child_by_field_name("function")?;
|
||||
if function.kind() != "attribute" {
|
||||
return None;
|
||||
}
|
||||
let object = function.child_by_field_name("object")?;
|
||||
if !matches!(object.kind(), "identifier" | "attribute") {
|
||||
return None;
|
||||
}
|
||||
let prefix = text(object, bytes).trim().to_string();
|
||||
if prefix.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(prefix)
|
||||
}
|
||||
}
|
||||
|
||||
/// Unwrap one `Depends(...)` / `Security(...)` list element from a
|
||||
/// FastAPI `dependencies` list and return the inner callable as a
|
||||
/// `CallSite`. Four shapes are accepted:
|
||||
/// * `Depends(callee(arg1, arg2))`, most common, the inner call is
|
||||
/// the callable factory invocation; record `callee` as the auth
|
||||
/// check.
|
||||
/// * `Depends(callee)`, bare reference; record `callee` itself.
|
||||
/// * `Depends()` / non-`Depends` items, skipped.
|
||||
fn unwrap_depends_call(node: Node<'_>, bytes: &[u8]) -> Option<CallSite> {
|
||||
/// * `Security(callee, scopes=[...])`, FastAPI's OAuth2-scope
|
||||
/// variant of `Depends`; the first positional arg is the auth
|
||||
/// callable, the `scopes=` kwarg is ignored. Real-world airflow
|
||||
/// execution-API routes use this form
|
||||
/// (`task_instances.py:104`).
|
||||
/// * `Depends()` / non-marker items, skipped.
|
||||
///
|
||||
/// Skips `keyword_argument` children when locating the first
|
||||
/// positional, so kwargs ordering (`Security(scopes=..., callee)`)
|
||||
/// does not hide the dependency.
|
||||
fn unwrap_depends_call(node: Node<'_>, bytes: &[u8]) -> Option<(CallSite, bool)> {
|
||||
if node.kind() != "call" {
|
||||
return None;
|
||||
}
|
||||
let function = node.child_by_field_name("function")?;
|
||||
let function_text = text(function, bytes);
|
||||
if !is_depends_callee(&function_text) {
|
||||
if !is_dep_marker_callee(&function_text) {
|
||||
return None;
|
||||
}
|
||||
let is_security = is_security_marker(&function_text);
|
||||
let arguments = node.child_by_field_name("arguments")?;
|
||||
let first = named_children(arguments).into_iter().next()?;
|
||||
let children = named_children(arguments);
|
||||
let first = children
|
||||
.iter()
|
||||
.copied()
|
||||
.find(|child| child.kind() != "keyword_argument")?;
|
||||
let scoped_security = is_security
|
||||
&& keyword_argument_value(arguments, bytes, "scopes")
|
||||
.map(|value| {
|
||||
named_children(value)
|
||||
.iter()
|
||||
.any(|item| item.kind() != "comment")
|
||||
})
|
||||
.unwrap_or(false);
|
||||
match first.kind() {
|
||||
"call" => Some(call_site_from_node(first, bytes)),
|
||||
"identifier" | "attribute" | "scoped_identifier" => Some(call_site_from_node(first, bytes)),
|
||||
"call" => Some((call_site_from_node(first, bytes), scoped_security)),
|
||||
"identifier" | "attribute" | "scoped_identifier" => {
|
||||
Some((call_site_from_node(first, bytes), scoped_security))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// True for the FastAPI `Depends` marker, including the
|
||||
/// fully-qualified `fastapi.Depends` form. Conservative: only literal
|
||||
/// matches, no canonicalisation.
|
||||
fn is_depends_callee(callee: &str) -> bool {
|
||||
/// Subset of `is_dep_marker_callee` that matches only the `Security`
|
||||
/// variant (and its fully-qualified forms). `Security(callable,
|
||||
/// scopes=[...])` is FastAPI's OAuth2-scope-checked dependency: the
|
||||
/// inner callable is invoked with the merged `SecurityScopes` from
|
||||
/// every parent `Security(...)` declaration, and the route is
|
||||
/// rejected unless the bearer token carries one of the requested
|
||||
/// scopes. Treating a scoped Security wrapper as authorization
|
||||
/// (not just login) is the deeper semantic encoded by
|
||||
/// `inject_middleware_auth`.
|
||||
fn is_security_marker(callee: &str) -> bool {
|
||||
let trimmed = callee.trim();
|
||||
matches!(
|
||||
trimmed,
|
||||
"Depends" | "fastapi.Depends" | "fastapi.params.Depends"
|
||||
"Security" | "fastapi.Security" | "fastapi.params.Security"
|
||||
)
|
||||
}
|
||||
|
||||
/// True for the FastAPI dependency markers `Depends` and `Security`,
|
||||
/// including their fully-qualified forms. `Security(callable,
|
||||
/// scopes=[...])` is the OAuth2-scope variant of `Depends(callable)`;
|
||||
/// FastAPI treats the inner callable identically for dep-injection
|
||||
/// purposes. Conservative: only literal matches, no canonicalisation.
|
||||
fn is_dep_marker_callee(callee: &str) -> bool {
|
||||
let trimmed = callee.trim();
|
||||
matches!(
|
||||
trimmed,
|
||||
"Depends"
|
||||
| "fastapi.Depends"
|
||||
| "fastapi.params.Depends"
|
||||
| "Security"
|
||||
| "fastapi.Security"
|
||||
| "fastapi.params.Security"
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -344,31 +609,108 @@ fn inject_middleware_auth(
|
|||
model: &mut AuthorizationModel,
|
||||
unit_idx: usize,
|
||||
line: usize,
|
||||
middleware_calls: &[CallSite],
|
||||
middleware_calls: &[(CallSite, bool)],
|
||||
rules: &AuthAnalysisRules,
|
||||
) {
|
||||
let Some(unit) = model.units.get_mut(unit_idx) else {
|
||||
return;
|
||||
};
|
||||
for call in middleware_calls {
|
||||
if let Some(mut check) = auth_check_from_call_site(call, line, rules) {
|
||||
// Mark as route-level: the check is declared at the route
|
||||
// boundary (Flask `@requires_role(...)` decorator, FastAPI
|
||||
// `dependencies=[Depends(...)]`, or any custom-router
|
||||
// equivalent) and semantically authorizes every value the
|
||||
// handler receives, path param, body, query, downstream
|
||||
// row fetches, the lot. `auth_check_covers_subject` reads
|
||||
// `is_route_level` and short-circuits `true` for any
|
||||
// non-login-guard match, which is the correct shape for a
|
||||
// decorator-level guard whose inner call carries no
|
||||
// per-arg subject ref pointing back into the handler body.
|
||||
// LoginGuard / TokenExpiry / TokenRecipient kinds are
|
||||
// already excluded by `has_prior_subject_auth`'s filter
|
||||
// before they reach `auth_check_covers_subject`, so the
|
||||
// flag is safe to set unconditionally here, it has no
|
||||
// effect on those kinds.
|
||||
check.is_route_level = true;
|
||||
unit.auth_checks.push(check);
|
||||
for (call, scoped_security) in middleware_calls {
|
||||
let mut check = match auth_check_from_call_site(call, line, rules) {
|
||||
Some(check) => check,
|
||||
None if *scoped_security => {
|
||||
// FastAPI `Security(callable, scopes=[...])` always
|
||||
// enforces authorization at the route boundary even
|
||||
// when `callable` doesn't appear in any per-language
|
||||
// login-guard / authorization-check name list. Synthesise
|
||||
// an `Other`-kind check so the route is recognised as
|
||||
// guarded; without this, every `Security(custom_dep,
|
||||
// scopes=[...])` route fires `missing_ownership_check`
|
||||
// FPs.
|
||||
AuthCheck {
|
||||
kind: AuthCheckKind::Other,
|
||||
callee: call.name.clone(),
|
||||
subjects: Vec::new(),
|
||||
span: call.span,
|
||||
line,
|
||||
args: call.args.clone(),
|
||||
condition_text: None,
|
||||
is_route_level: false,
|
||||
}
|
||||
}
|
||||
None => continue,
|
||||
};
|
||||
// Mark as route-level: the check is declared at the route
|
||||
// boundary (Flask `@requires_role(...)` decorator, FastAPI
|
||||
// `dependencies=[Depends(...)]`, or any custom-router
|
||||
// equivalent) and semantically authorizes every value the
|
||||
// handler receives, path param, body, query, downstream
|
||||
// row fetches, the lot. `auth_check_covers_subject` reads
|
||||
// `is_route_level` and short-circuits `true` for any
|
||||
// non-login-guard match, which is the correct shape for a
|
||||
// decorator-level guard whose inner call carries no
|
||||
// per-arg subject ref pointing back into the handler body.
|
||||
// LoginGuard / TokenExpiry / TokenRecipient kinds are
|
||||
// already excluded by `has_prior_subject_auth`'s filter
|
||||
// before they reach `auth_check_covers_subject`, so the
|
||||
// flag is safe to set unconditionally here, it has no
|
||||
// effect on those kinds.
|
||||
check.is_route_level = true;
|
||||
// FastAPI `Security(callable, scopes=[...])` is OAuth2-scope-
|
||||
// checked authorization (the JWT must carry one of the listed
|
||||
// scopes); a `LoginGuard` classification would be wrong because
|
||||
// `has_prior_subject_auth` filters LoginGuard out. Promote to
|
||||
// `Other` so the route counts as authorized for ownership /
|
||||
// membership / token-override checks.
|
||||
if *scoped_security
|
||||
&& matches!(
|
||||
check.kind,
|
||||
AuthCheckKind::LoginGuard
|
||||
| AuthCheckKind::TokenExpiry
|
||||
| AuthCheckKind::TokenRecipient
|
||||
)
|
||||
{
|
||||
check.kind = AuthCheckKind::Other;
|
||||
}
|
||||
let push_token_synth = *scoped_security;
|
||||
unit.auth_checks.push(check);
|
||||
if push_token_synth {
|
||||
// FastAPI `Security(callable, scopes=[...])` validates the
|
||||
// bearer JWT in two ways: signature verification (which
|
||||
// includes expiry — a JWT past its `exp` claim fails the
|
||||
// signature path) and scope checking (the requested scopes
|
||||
// identify what the bearer is authorized to act on, which
|
||||
// semantically encodes recipient binding for the route).
|
||||
// Synthesise the matching `TokenExpiry` + `TokenRecipient`
|
||||
// checks so the `token_override_without_validation` rule
|
||||
// recognises the JWT-validated route. Without this,
|
||||
// every FastAPI route declaring scoped Security at the
|
||||
// route or router boundary fires token-override FPs on
|
||||
// its `session.add` / `Model.save()` calls — the
|
||||
// missing_ownership_check sibling of the same finding is
|
||||
// already cleared by the kind-promotion above. Empty- or
|
||||
// missing-scopes Security wrappers fall through this gate
|
||||
// (scoped_security is false) and remain bare login deps.
|
||||
unit.auth_checks.push(AuthCheck {
|
||||
kind: AuthCheckKind::TokenExpiry,
|
||||
callee: call.name.clone(),
|
||||
subjects: Vec::new(),
|
||||
span: call.span,
|
||||
line,
|
||||
args: call.args.clone(),
|
||||
condition_text: None,
|
||||
is_route_level: true,
|
||||
});
|
||||
unit.auth_checks.push(AuthCheck {
|
||||
kind: AuthCheckKind::TokenRecipient,
|
||||
callee: call.name.clone(),
|
||||
subjects: Vec::new(),
|
||||
span: call.span,
|
||||
line,
|
||||
args: call.args.clone(),
|
||||
condition_text: None,
|
||||
is_route_level: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -410,24 +752,318 @@ mod test_decorator_tests {
|
|||
|
||||
#[cfg(test)]
|
||||
mod fastapi_dependencies_tests {
|
||||
use super::is_depends_callee;
|
||||
use super::{is_dep_marker_callee, is_security_marker, unwrap_depends_call};
|
||||
use tree_sitter::Parser;
|
||||
|
||||
/// `is_depends_callee` only matches the FastAPI `Depends` marker.
|
||||
/// Any other wrapper call inside `dependencies=[...]` is ignored ,
|
||||
/// extracting an inner callee from the wrong wrapper would
|
||||
/// misclassify logging hooks or filter callables as auth checks.
|
||||
fn parse_python(source: &str) -> tree_sitter::Tree {
|
||||
let mut parser = Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter::Language::from(tree_sitter_python::LANGUAGE))
|
||||
.expect("python language");
|
||||
parser.parse(source, None).expect("parse")
|
||||
}
|
||||
|
||||
/// Walk the parsed tree to find the first `call` node whose
|
||||
/// callee name matches `marker`. Helper for the `unwrap_depends_call`
|
||||
/// regression tests below — the production extractor traverses the
|
||||
/// route-decorator's `dependencies=[...]` list and feeds each
|
||||
/// element into `unwrap_depends_call`, so the test mirrors that
|
||||
/// element shape directly without the surrounding boilerplate.
|
||||
fn find_first_marker_call<'a>(
|
||||
node: tree_sitter::Node<'a>,
|
||||
bytes: &[u8],
|
||||
marker: &str,
|
||||
) -> Option<tree_sitter::Node<'a>> {
|
||||
if node.kind() == "call"
|
||||
&& let Some(function) = node.child_by_field_name("function")
|
||||
&& function.utf8_text(bytes).unwrap_or("") == marker
|
||||
{
|
||||
return Some(node);
|
||||
}
|
||||
for idx in 0..node.named_child_count() {
|
||||
if let Some(child) = node.named_child(idx as u32)
|
||||
&& let Some(found) = find_first_marker_call(child, bytes, marker)
|
||||
{
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// `is_dep_marker_callee` matches only FastAPI's `Depends` /
|
||||
/// `Security` markers. Any other wrapper call inside
|
||||
/// `dependencies=[...]` is ignored, extracting an inner callee
|
||||
/// from the wrong wrapper would misclassify logging hooks or
|
||||
/// filter callables as auth checks.
|
||||
#[test]
|
||||
fn is_depends_callee_recognises_canonical_forms() {
|
||||
assert!(is_depends_callee("Depends"));
|
||||
assert!(is_depends_callee("fastapi.Depends"));
|
||||
assert!(is_depends_callee("fastapi.params.Depends"));
|
||||
fn is_dep_marker_callee_recognises_canonical_forms() {
|
||||
assert!(is_dep_marker_callee("Depends"));
|
||||
assert!(is_dep_marker_callee("fastapi.Depends"));
|
||||
assert!(is_dep_marker_callee("fastapi.params.Depends"));
|
||||
// Security variant — OAuth2-scope-bearing equivalent.
|
||||
assert!(is_dep_marker_callee("Security"));
|
||||
assert!(is_dep_marker_callee("fastapi.Security"));
|
||||
assert!(is_dep_marker_callee("fastapi.params.Security"));
|
||||
// Whitespace tolerance.
|
||||
assert!(is_depends_callee(" Depends "));
|
||||
assert!(is_dep_marker_callee(" Depends "));
|
||||
assert!(is_dep_marker_callee(" Security "));
|
||||
// Negatives.
|
||||
assert!(!is_depends_callee("Annotated"));
|
||||
assert!(!is_depends_callee("Body"));
|
||||
assert!(!is_depends_callee("Depends.something"));
|
||||
assert!(!is_depends_callee("RequiresAuth"));
|
||||
assert!(!is_depends_callee(""));
|
||||
assert!(!is_dep_marker_callee("Annotated"));
|
||||
assert!(!is_dep_marker_callee("Body"));
|
||||
assert!(!is_dep_marker_callee("Depends.something"));
|
||||
assert!(!is_dep_marker_callee("Security.something"));
|
||||
assert!(!is_dep_marker_callee("RequiresAuth"));
|
||||
assert!(!is_dep_marker_callee(""));
|
||||
}
|
||||
|
||||
/// `is_security_marker` is the strictly-Security subset. Used to
|
||||
/// promote the wrapper's `is_scoped_security` flag without a
|
||||
/// second string-match pass.
|
||||
#[test]
|
||||
fn is_security_marker_recognises_security_only() {
|
||||
assert!(is_security_marker("Security"));
|
||||
assert!(is_security_marker("fastapi.Security"));
|
||||
assert!(is_security_marker("fastapi.params.Security"));
|
||||
assert!(is_security_marker(" Security "));
|
||||
// Depends is NOT a Security marker.
|
||||
assert!(!is_security_marker("Depends"));
|
||||
assert!(!is_security_marker("fastapi.Depends"));
|
||||
assert!(!is_security_marker("Annotated"));
|
||||
assert!(!is_security_marker(""));
|
||||
}
|
||||
|
||||
/// `Security(callable, scopes=[...])` — the canonical airflow
|
||||
/// execution-API auth-dep shape (`task_instances.py:104`). Must
|
||||
/// extract `callable` as the inner CallSite AND flag the wrapper as
|
||||
/// scoped-security so `inject_middleware_auth` promotes the
|
||||
/// AuthCheckKind from LoginGuard to Other (OAuth2 scopes are
|
||||
/// authorization, not just login). Without the promotion, the
|
||||
/// route still fires `missing_ownership_check` despite carrying a
|
||||
/// declared route-level dependency.
|
||||
#[test]
|
||||
fn unwrap_depends_call_security_with_scopes_flags_scoped() {
|
||||
let src = "x = Security(require_auth, scopes=[\"token:execution\"])\n";
|
||||
let tree = parse_python(src);
|
||||
let bytes = src.as_bytes();
|
||||
let call = find_first_marker_call(tree.root_node(), bytes, "Security")
|
||||
.expect("Security call node");
|
||||
let (site, scoped) = unwrap_depends_call(call, bytes).expect("Security recognised");
|
||||
assert_eq!(site.name, "require_auth");
|
||||
assert!(
|
||||
scoped,
|
||||
"non-empty scopes=[...] must mark the wrapper scoped"
|
||||
);
|
||||
}
|
||||
|
||||
/// `Depends(callable())` — pre-existing FastAPI shape. Inner call
|
||||
/// extracts to the factory's outer name; wrapper is NOT
|
||||
/// scoped-security. Regression guard: the Security extension must
|
||||
/// not flip Depends's scoped flag on.
|
||||
#[test]
|
||||
fn unwrap_depends_call_depends_factory_not_scoped() {
|
||||
let src = "x = Depends(requires_access_dag(method=\"GET\"))\n";
|
||||
let tree = parse_python(src);
|
||||
let bytes = src.as_bytes();
|
||||
let call =
|
||||
find_first_marker_call(tree.root_node(), bytes, "Depends").expect("Depends call node");
|
||||
let (site, scoped) = unwrap_depends_call(call, bytes).expect("Depends recognised");
|
||||
assert_eq!(site.name, "requires_access_dag");
|
||||
assert!(!scoped, "Depends wrapper never scoped-security");
|
||||
}
|
||||
|
||||
/// `Security(callable)` without scopes (rare but legal) is NOT
|
||||
/// scoped — the OAuth2-scope semantic only fires when scopes is
|
||||
/// non-empty, so the wrapper falls back to the regular login-guard
|
||||
/// classification. Conservative: don't over-promote.
|
||||
#[test]
|
||||
fn unwrap_depends_call_security_without_scopes_not_scoped() {
|
||||
let src = "x = Security(require_auth)\n";
|
||||
let tree = parse_python(src);
|
||||
let bytes = src.as_bytes();
|
||||
let call = find_first_marker_call(tree.root_node(), bytes, "Security")
|
||||
.expect("Security call node");
|
||||
let (site, scoped) = unwrap_depends_call(call, bytes).expect("Security recognised");
|
||||
assert_eq!(site.name, "require_auth");
|
||||
assert!(
|
||||
!scoped,
|
||||
"missing scopes=[...] kwarg means not scoped-security"
|
||||
);
|
||||
}
|
||||
|
||||
/// `Security(callable, scopes=[])` with an empty scope list is NOT
|
||||
/// scoped-security: an empty `scopes=[]` declaration accumulates
|
||||
/// no required scopes onto the JWT check, so the route is
|
||||
/// effectively a bare login dependency. Conservative — keeps the
|
||||
/// promotion gate tight.
|
||||
#[test]
|
||||
fn unwrap_depends_call_security_empty_scopes_not_scoped() {
|
||||
let src = "x = Security(require_auth, scopes=[])\n";
|
||||
let tree = parse_python(src);
|
||||
let bytes = src.as_bytes();
|
||||
let call = find_first_marker_call(tree.root_node(), bytes, "Security")
|
||||
.expect("Security call node");
|
||||
let (site, scoped) = unwrap_depends_call(call, bytes).expect("Security recognised");
|
||||
assert_eq!(site.name, "require_auth");
|
||||
assert!(!scoped, "scopes=[] is not scoped-security");
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod router_level_dependencies_tests {
|
||||
use super::{
|
||||
collect_router_level_dependencies, is_router_like_constructor, router_prefix_from_decorator,
|
||||
};
|
||||
use tree_sitter::Parser;
|
||||
|
||||
fn parse_python(source: &str) -> tree_sitter::Tree {
|
||||
let mut parser = Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter::Language::from(tree_sitter_python::LANGUAGE))
|
||||
.expect("python language");
|
||||
parser.parse(source, None).expect("parse")
|
||||
}
|
||||
|
||||
/// Tail-name match: `fastapi.APIRouter`, `routing.APIRouter`, bare
|
||||
/// `APIRouter`, plus airflow's `VersionedAPIRouter` subclass. Suffix
|
||||
/// rule covers project-specific `*Router` subclasses without an
|
||||
/// exhaustive allowlist. Negatives must reject arbitrary lowercase
|
||||
/// or non-router identifiers.
|
||||
#[test]
|
||||
fn is_router_like_constructor_matches_canonical_names() {
|
||||
// Canonical FastAPI.
|
||||
assert!(is_router_like_constructor("APIRouter"));
|
||||
assert!(is_router_like_constructor("FastAPI"));
|
||||
assert!(is_router_like_constructor("fastapi.APIRouter"));
|
||||
assert!(is_router_like_constructor("fastapi.routing.APIRouter"));
|
||||
assert!(is_router_like_constructor("fastapi.FastAPI"));
|
||||
// Airflow.
|
||||
assert!(is_router_like_constructor("VersionedAPIRouter"));
|
||||
// Project-specific *Router subclasses.
|
||||
assert!(is_router_like_constructor("CustomRouter"));
|
||||
assert!(is_router_like_constructor("api.v1.MyRouter"));
|
||||
// Negatives.
|
||||
assert!(!is_router_like_constructor("router"));
|
||||
assert!(!is_router_like_constructor("Annotated"));
|
||||
assert!(!is_router_like_constructor("Depends"));
|
||||
assert!(!is_router_like_constructor("Security"));
|
||||
assert!(!is_router_like_constructor(""));
|
||||
// `Router` alone is too short / generic to match the suffix
|
||||
// rule (would over-fire on any callable named exactly
|
||||
// `Router`); we accept it explicitly nowhere.
|
||||
assert!(!is_router_like_constructor("Router"));
|
||||
// `flat_router` ends with `Router` but starts lowercase —
|
||||
// suffix rule requires uppercase first char to avoid catching
|
||||
// generic verbs.
|
||||
assert!(!is_router_like_constructor("flat_router"));
|
||||
}
|
||||
|
||||
/// Airflow's `ti_id_router = VersionedAPIRouter(route_class=...,
|
||||
/// dependencies=[Security(require_auth, scopes=["ti:self"])])` is
|
||||
/// the canonical real-repo shape. The collector must capture the
|
||||
/// `Security(require_auth, scopes=...)` dep keyed by
|
||||
/// `ti_id_router`, and the wrapper must be flagged scoped-security
|
||||
/// so `inject_middleware_auth` promotes the AuthCheckKind to Other.
|
||||
#[test]
|
||||
fn collect_router_level_dependencies_picks_up_versioned_apirouter_security() {
|
||||
let src = "ti_id_router = VersionedAPIRouter(\n route_class=ExecutionAPIRoute,\n dependencies=[\n Security(require_auth, scopes=[\"ti:self\"]),\n ],\n)\n";
|
||||
let tree = parse_python(src);
|
||||
let bytes = src.as_bytes();
|
||||
let map = collect_router_level_dependencies(tree.root_node(), bytes);
|
||||
let deps = map
|
||||
.get("ti_id_router")
|
||||
.expect("ti_id_router router-level deps captured");
|
||||
assert_eq!(deps.len(), 1);
|
||||
let (site, scoped) = &deps[0];
|
||||
assert_eq!(site.name, "require_auth");
|
||||
assert!(*scoped, "scopes=[\"ti:self\"] must mark scoped-security");
|
||||
}
|
||||
|
||||
/// Bare `Depends(...)` router-level dep (no scopes) — captured but
|
||||
/// NOT scoped-security. Mirrors the per-route Depends test in the
|
||||
/// sibling fastapi_dependencies_tests module.
|
||||
#[test]
|
||||
fn collect_router_level_dependencies_picks_up_apirouter_depends_not_scoped() {
|
||||
let src = "v1 = APIRouter(\n prefix=\"/v1\",\n dependencies=[Depends(get_current_user)],\n)\n";
|
||||
let tree = parse_python(src);
|
||||
let bytes = src.as_bytes();
|
||||
let map = collect_router_level_dependencies(tree.root_node(), bytes);
|
||||
let deps = map.get("v1").expect("v1 router-level deps captured");
|
||||
assert_eq!(deps.len(), 1);
|
||||
let (site, scoped) = &deps[0];
|
||||
assert_eq!(site.name, "get_current_user");
|
||||
assert!(!*scoped, "Depends never scoped-security");
|
||||
}
|
||||
|
||||
/// Constructor without `dependencies=` kwarg → no entry in the
|
||||
/// map. Routers without router-level deps must not produce a
|
||||
/// fake key — the per-route extractor would then merge an empty
|
||||
/// list and silently no-op, but absence is the cleaner signal.
|
||||
#[test]
|
||||
fn collect_router_level_dependencies_skips_routers_without_deps() {
|
||||
let src = "router = APIRouter(prefix=\"/x\")\n";
|
||||
let tree = parse_python(src);
|
||||
let bytes = src.as_bytes();
|
||||
let map = collect_router_level_dependencies(tree.root_node(), bytes);
|
||||
assert!(!map.contains_key("router"));
|
||||
}
|
||||
|
||||
/// Non-router constructor (`MyService(...)`) with a coincidental
|
||||
/// `dependencies=` kwarg must NOT enter the router-dep map.
|
||||
/// `MyService` doesn't end with `Router` and isn't on the explicit
|
||||
/// allowlist, so the gate rejects it.
|
||||
#[test]
|
||||
fn collect_router_level_dependencies_skips_non_router_constructors() {
|
||||
let src = "svc = MyService(dependencies=[Depends(get_db)])\n";
|
||||
let tree = parse_python(src);
|
||||
let bytes = src.as_bytes();
|
||||
let map = collect_router_level_dependencies(tree.root_node(), bytes);
|
||||
assert!(!map.contains_key("svc"));
|
||||
}
|
||||
|
||||
/// Helper: parse a single decorated function and pull out the
|
||||
/// decorator call so `router_prefix_from_decorator` can be tested
|
||||
/// in isolation. Mirrors the `find_first_marker_call` helper in
|
||||
/// the sibling test module.
|
||||
fn find_first_decorator<'a>(node: tree_sitter::Node<'a>) -> Option<tree_sitter::Node<'a>> {
|
||||
if node.kind() == "decorator"
|
||||
&& let Some(child) = node.named_child(0)
|
||||
{
|
||||
return Some(child);
|
||||
}
|
||||
for idx in 0..node.named_child_count() {
|
||||
if let Some(child) = node.named_child(idx as u32)
|
||||
&& let Some(found) = find_first_decorator(child)
|
||||
{
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// `@ti_id_router.patch("/x")` → prefix `"ti_id_router"`. This is
|
||||
/// the lookup key the per-route extractor uses to pull
|
||||
/// router-level deps out of the map.
|
||||
#[test]
|
||||
fn router_prefix_from_decorator_extracts_simple_identifier() {
|
||||
let src = "@ti_id_router.patch(\"/x\")\ndef f():\n pass\n";
|
||||
let tree = parse_python(src);
|
||||
let bytes = src.as_bytes();
|
||||
let decorator = find_first_decorator(tree.root_node()).expect("decorator call node");
|
||||
let prefix = router_prefix_from_decorator(decorator, bytes).expect("prefix extracted");
|
||||
assert_eq!(prefix, "ti_id_router");
|
||||
}
|
||||
|
||||
/// Bare-identifier decorators (`@requires_auth\ndef f(): ...`) and
|
||||
/// non-attribute callees return None — there's no router prefix
|
||||
/// to look up.
|
||||
#[test]
|
||||
fn router_prefix_from_decorator_returns_none_for_bare_decorator() {
|
||||
let src = "@requires_auth\ndef f():\n pass\n";
|
||||
let tree = parse_python(src);
|
||||
let bytes = src.as_bytes();
|
||||
let decorator = find_first_decorator(tree.root_node()).expect("decorator node");
|
||||
// `@requires_auth` produces an `identifier` child, not a
|
||||
// `call`, so router_prefix should None out at the call gate.
|
||||
assert!(router_prefix_from_decorator(decorator, bytes).is_none());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
use super::AuthExtractor;
|
||||
use super::common::{
|
||||
attach_route_handler, call_site_from_node, collect_top_level_units, http_method_from_name,
|
||||
is_handler_reference, join_route_paths, member_target, named_children, push_route_registration,
|
||||
string_literal_value, text, visit_named_nodes,
|
||||
attach_route_handler, call_site_from_node, http_method_from_name, is_handler_reference,
|
||||
join_route_paths, member_target, named_children, push_route_registration, string_literal_value,
|
||||
text, visit_named_nodes,
|
||||
};
|
||||
use crate::auth_analysis::config::AuthAnalysisRules;
|
||||
use crate::auth_analysis::model::{AuthorizationModel, CallSite, Framework};
|
||||
|
|
@ -26,24 +26,21 @@ impl AuthExtractor for GinExtractor {
|
|||
bytes: &[u8],
|
||||
path: &Path,
|
||||
rules: &AuthAnalysisRules,
|
||||
) -> AuthorizationModel {
|
||||
model: &mut AuthorizationModel,
|
||||
) {
|
||||
let root = tree.root_node();
|
||||
let mut model = AuthorizationModel::default();
|
||||
let mut groups = HashMap::new();
|
||||
|
||||
collect_top_level_units(root, bytes, rules, &mut model);
|
||||
visit_named_nodes(root, &mut |node| match node.kind() {
|
||||
"short_var_declaration" | "assignment_statement" => {
|
||||
maybe_collect_group_binding(node, bytes, &mut groups)
|
||||
}
|
||||
"call_expression" => {
|
||||
maybe_collect_group_use(node, bytes, &mut groups);
|
||||
maybe_collect_route(root, node, bytes, path, rules, &groups, &mut model);
|
||||
maybe_collect_route(root, node, bytes, path, rules, &groups, model);
|
||||
}
|
||||
_ => {}
|
||||
});
|
||||
|
||||
model
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
use super::AuthExtractor;
|
||||
use super::common::{
|
||||
attach_route_handler, call_site_from_node, collect_top_level_units, http_method_from_name,
|
||||
is_handler_reference, member_target, named_children, push_route_registration,
|
||||
string_literal_value, visit_named_nodes,
|
||||
attach_route_handler, call_site_from_node, http_method_from_name, is_handler_reference,
|
||||
member_target, named_children, push_route_registration, string_literal_value,
|
||||
visit_named_nodes,
|
||||
};
|
||||
use crate::auth_analysis::config::AuthAnalysisRules;
|
||||
use crate::auth_analysis::model::{AuthorizationModel, Framework};
|
||||
|
|
@ -25,18 +25,14 @@ impl AuthExtractor for KoaExtractor {
|
|||
bytes: &[u8],
|
||||
path: &Path,
|
||||
rules: &AuthAnalysisRules,
|
||||
) -> AuthorizationModel {
|
||||
model: &mut AuthorizationModel,
|
||||
) {
|
||||
let root = tree.root_node();
|
||||
let mut model = AuthorizationModel::default();
|
||||
|
||||
collect_top_level_units(root, bytes, rules, &mut model);
|
||||
visit_named_nodes(root, &mut |node| {
|
||||
if node.kind() == "call_expression" {
|
||||
maybe_collect_route(root, node, bytes, path, rules, &mut model);
|
||||
maybe_collect_route(root, node, bytes, path, rules, model);
|
||||
}
|
||||
});
|
||||
|
||||
model
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
use super::config::AuthAnalysisRules;
|
||||
use super::model::AuthorizationModel;
|
||||
use super::model::{AuthorizationModel, CallSite};
|
||||
use crate::utils::project::{FrameworkContext, rust_file_imports_web_framework};
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use tree_sitter::Tree;
|
||||
|
||||
|
|
@ -21,13 +22,26 @@ pub mod spring;
|
|||
|
||||
pub trait AuthExtractor {
|
||||
fn supports(&self, lang: &str, framework_ctx: Option<&FrameworkContext>) -> bool;
|
||||
|
||||
/// Returns true when this extractor expects the orchestrator to
|
||||
/// have already populated `model.units` with one
|
||||
/// `AnalysisUnitKind::Function` entry per top-level function /
|
||||
/// method via [`common::collect_top_level_units`]. Defaults to
|
||||
/// `true`; framework extractors that build their own unit set
|
||||
/// (Spring, Rails) override to `false` so the orchestrator skips
|
||||
/// the shared collection pass when only those extractors match.
|
||||
fn requires_top_level_units(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn extract(
|
||||
&self,
|
||||
tree: &Tree,
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
rules: &AuthAnalysisRules,
|
||||
) -> AuthorizationModel;
|
||||
model: &mut AuthorizationModel,
|
||||
);
|
||||
}
|
||||
|
||||
pub fn extract_authorization_model(
|
||||
|
|
@ -37,6 +51,7 @@ pub fn extract_authorization_model(
|
|||
bytes: &[u8],
|
||||
path: &Path,
|
||||
rules: &AuthAnalysisRules,
|
||||
cross_file_router_deps: Option<&HashMap<String, Vec<(CallSite, bool)>>>,
|
||||
) -> AuthorizationModel {
|
||||
let extractors: [&dyn AuthExtractor; 13] = [
|
||||
&express::ExpressExtractor,
|
||||
|
|
@ -57,14 +72,47 @@ pub fn extract_authorization_model(
|
|||
lang: lang.to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
// Pre-populate the cross-file router-dep map BEFORE extractors run.
|
||||
// FlaskExtractor reads `model.cross_file_router_deps` and merges the
|
||||
// resolved deps into its local router-deps map at extraction time,
|
||||
// so per-route auth attribution sees both the local-file
|
||||
// `dependencies=[Security(...)]` declarations and the cross-file
|
||||
// lift from `<parent>.include_router(<this_file>.<router>, ...)`
|
||||
// edges visible elsewhere in the project. Empty / `None` for every
|
||||
// non-Python language and for files with no matching child edges.
|
||||
if let Some(deps) = cross_file_router_deps {
|
||||
model.cross_file_router_deps = deps.clone();
|
||||
}
|
||||
|
||||
// **Hoist `collect_top_level_units` out of the per-extractor loop.**
|
||||
// For multi-extractor languages (Go: gin+echo, JS/TS: express+koa+
|
||||
// fastify, Python: flask+django, Rust: axum+actix_web+rocket, Ruby:
|
||||
// sinatra) the legacy code re-walked the entire AST and rebuilt the
|
||||
// `Function`-kind unit set per extractor (then deduped by span).
|
||||
// `collect_top_level_units` was the dominant cost in
|
||||
// `extract_authorization_model` (46% of total wall-clock on the
|
||||
// mattermost/server/channels/app subtree, 2026-05-04 profile).
|
||||
//
|
||||
// After the hoist each extractor receives a `&mut model` that
|
||||
// already carries the shared unit set; framework-specific work
|
||||
// (route detection, middleware injection, typed-extractor guards)
|
||||
// augments and promotes those units in place via the existing
|
||||
// `attach_route_handler` "promote-or-create" path.
|
||||
//
|
||||
// Spring + Rails build their own unit set (`maybe_collect_controller`
|
||||
// / Rails' `collect_nodes`), so they opt out via
|
||||
// `requires_top_level_units = false`; the shared pass runs only
|
||||
// when at least one matching extractor needs it.
|
||||
let any_requires_units = extractors
|
||||
.iter()
|
||||
.any(|e| e.supports(lang, framework_ctx) && e.requires_top_level_units());
|
||||
if any_requires_units {
|
||||
common::collect_top_level_units(tree.root_node(), bytes, rules, &mut model);
|
||||
}
|
||||
|
||||
for extractor in extractors {
|
||||
if extractor.supports(lang, framework_ctx) {
|
||||
let mut other = extractor.extract(tree, bytes, path, rules);
|
||||
// Preserve the canonical `lang` set above; sub-extractors
|
||||
// build their own default-initialised models with empty lang.
|
||||
other.lang = model.lang.clone();
|
||||
model.extend(other);
|
||||
extractor.extract(tree, bytes, path, rules, &mut model);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -22,17 +22,24 @@ impl AuthExtractor for RailsExtractor {
|
|||
.is_none_or(|ctx| ctx.frameworks.is_empty() || ctx.has(DetectedFramework::Rails))
|
||||
}
|
||||
|
||||
fn requires_top_level_units(&self) -> bool {
|
||||
// Rails builds its own RouteHandler unit set inside `collect_nodes`
|
||||
// (controller actions inferred from `routes.rb` resource entries
|
||||
// and conventional `resources :foo` mappings). It never relies on
|
||||
// the orchestrator's shared `collect_top_level_units` pass.
|
||||
false
|
||||
}
|
||||
|
||||
fn extract(
|
||||
&self,
|
||||
tree: &Tree,
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
rules: &AuthAnalysisRules,
|
||||
) -> AuthorizationModel {
|
||||
model: &mut AuthorizationModel,
|
||||
) {
|
||||
let root = tree.root_node();
|
||||
let mut model = AuthorizationModel::default();
|
||||
collect_nodes(root, &[], bytes, path, rules, &mut model);
|
||||
model
|
||||
collect_nodes(root, &[], bytes, path, rules, model);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,8 +4,7 @@ use super::axum::{
|
|||
rust_param_aliases,
|
||||
};
|
||||
use super::common::{
|
||||
attach_route_handler, collect_top_level_units, function_definition_node, function_name,
|
||||
named_children, text,
|
||||
attach_route_handler, function_definition_node, function_name, named_children, text,
|
||||
};
|
||||
use crate::auth_analysis::config::AuthAnalysisRules;
|
||||
use crate::auth_analysis::model::{AuthorizationModel, Framework, HttpMethod, RouteRegistration};
|
||||
|
|
@ -28,14 +27,10 @@ impl AuthExtractor for RocketExtractor {
|
|||
bytes: &[u8],
|
||||
path: &Path,
|
||||
rules: &AuthAnalysisRules,
|
||||
) -> AuthorizationModel {
|
||||
model: &mut AuthorizationModel,
|
||||
) {
|
||||
let root = tree.root_node();
|
||||
let mut model = AuthorizationModel::default();
|
||||
|
||||
collect_top_level_units(root, bytes, rules, &mut model);
|
||||
collect_handlers(root, root, bytes, path, rules, &mut model);
|
||||
|
||||
model
|
||||
collect_handlers(root, root, bytes, path, rules, model);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
use super::AuthExtractor;
|
||||
use super::common::{
|
||||
auth_check_from_call_site, build_function_unit, call_name, call_site_from_node,
|
||||
collect_top_level_units, named_children, span, string_literal_value,
|
||||
auth_check_from_call_site, build_function_unit, call_name, call_site_from_node, named_children,
|
||||
span, string_literal_value,
|
||||
};
|
||||
use crate::auth_analysis::config::{AuthAnalysisRules, matches_name};
|
||||
use crate::auth_analysis::model::{
|
||||
|
|
@ -27,13 +27,11 @@ impl AuthExtractor for SinatraExtractor {
|
|||
bytes: &[u8],
|
||||
path: &Path,
|
||||
rules: &AuthAnalysisRules,
|
||||
) -> AuthorizationModel {
|
||||
model: &mut AuthorizationModel,
|
||||
) {
|
||||
let root = tree.root_node();
|
||||
let mut model = AuthorizationModel::default();
|
||||
collect_top_level_units(root, bytes, rules, &mut model);
|
||||
let before_filters = collect_before_filters(root, bytes);
|
||||
collect_routes(root, bytes, path, rules, &before_filters, &mut model);
|
||||
model
|
||||
collect_routes(root, bytes, path, rules, &before_filters, model);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -20,19 +20,27 @@ impl AuthExtractor for SpringExtractor {
|
|||
.is_none_or(|ctx| ctx.frameworks.is_empty() || ctx.has(DetectedFramework::Spring))
|
||||
}
|
||||
|
||||
fn requires_top_level_units(&self) -> bool {
|
||||
// Spring synthesises its own units inside `maybe_collect_controller`
|
||||
// (only `@Controller` / `@RestController`-annotated classes
|
||||
// produce units; non-controller Java files contribute nothing).
|
||||
// The orchestrator's shared `collect_top_level_units` pass would
|
||||
// emit a `Function` unit per top-level method on every Java file
|
||||
// including non-controller helpers, doubling work and broadening
|
||||
// the analysis surface beyond what Spring needs.
|
||||
false
|
||||
}
|
||||
|
||||
fn extract(
|
||||
&self,
|
||||
tree: &Tree,
|
||||
bytes: &[u8],
|
||||
path: &Path,
|
||||
rules: &AuthAnalysisRules,
|
||||
) -> AuthorizationModel {
|
||||
model: &mut AuthorizationModel,
|
||||
) {
|
||||
let root = tree.root_node();
|
||||
let mut model = AuthorizationModel::default();
|
||||
|
||||
collect_classes(root, bytes, path, rules, &mut model);
|
||||
|
||||
model
|
||||
collect_classes(root, bytes, path, rules, model);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -60,6 +60,7 @@ pub mod checks;
|
|||
pub mod config;
|
||||
pub mod extract;
|
||||
pub mod model;
|
||||
pub mod router_facts;
|
||||
pub mod sql_semantics;
|
||||
|
||||
use crate::commands::scan::Diag;
|
||||
|
|
@ -102,21 +103,98 @@ pub fn run_auth_analysis(
|
|||
if !rules.enabled {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let mut model = extract::extract_authorization_model(
|
||||
// Resolve cross-file router-deps for the active file (Python only)
|
||||
// before constructing the model, so the FlaskExtractor sees the
|
||||
// full per-file dep map at extraction time. See `router_facts`
|
||||
// module + `analyse_file_fused` for the wider pipeline.
|
||||
let cross_file_router_deps =
|
||||
resolve_cross_file_router_deps_for_file(lang, file_path, global_summaries);
|
||||
let model = extract::extract_authorization_model(
|
||||
lang,
|
||||
cfg.framework_ctx.as_ref(),
|
||||
tree,
|
||||
source,
|
||||
file_path,
|
||||
&rules,
|
||||
cross_file_router_deps.as_ref(),
|
||||
);
|
||||
run_auth_analysis_with_model(
|
||||
model,
|
||||
tree,
|
||||
lang,
|
||||
file_path,
|
||||
&rules,
|
||||
var_types,
|
||||
global_summaries,
|
||||
scan_root,
|
||||
)
|
||||
}
|
||||
|
||||
/// Look up `GlobalSummaries.router_facts_by_module` and resolve the
|
||||
/// cross-file router-deps map for the file at `file_path`. Returns
|
||||
/// `None` for non-Python files, files whose module_id has no matching
|
||||
/// `<parent>.include_router(<this_file>.<var>, ...)` edges anywhere in
|
||||
/// the project, or callers that don't pass `global_summaries`.
|
||||
pub(crate) fn resolve_cross_file_router_deps_for_file(
|
||||
lang: &str,
|
||||
file_path: &Path,
|
||||
global_summaries: Option<&GlobalSummaries>,
|
||||
) -> Option<HashMap<String, Vec<(model::CallSite, bool)>>> {
|
||||
if lang != "python" {
|
||||
return None;
|
||||
}
|
||||
let gs = global_summaries?;
|
||||
let module_id = router_facts::module_id_for_path(file_path)?;
|
||||
let resolved = gs.resolve_cross_file_router_deps(&module_id);
|
||||
if resolved.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(resolved)
|
||||
}
|
||||
}
|
||||
|
||||
/// Variant of [`run_auth_analysis`] that accepts a pre-built
|
||||
/// [`model::AuthorizationModel`] instead of building one from the AST.
|
||||
///
|
||||
/// Lets callers that need both diagnostics AND
|
||||
/// `(FuncKey, AuthCheckSummary)` per-file summaries (the fused pass-2
|
||||
/// path in [`crate::ast::analyse_file_fused`]) construct the base
|
||||
/// authorization model exactly once and route both consumers through
|
||||
/// it. Pre-fix the fused path called
|
||||
/// [`extract::extract_authorization_model`] twice per file (once via
|
||||
/// [`run_auth_analysis`], once via [`extract_auth_summaries_by_key`]),
|
||||
/// duplicating the AST walks for `collect_top_level_units` +
|
||||
/// `build_function_unit_with_meta` + `collect_unit_state` + every
|
||||
/// extractor's framework-detection scan. On the
|
||||
/// `mattermost/server/channels/app` profile that double-extract
|
||||
/// accounted for 35.3% of total wall-clock; sharing the base model
|
||||
/// drops it to ~17.6%.
|
||||
///
|
||||
/// The mutations applied here (`apply_var_types_to_model`,
|
||||
/// `apply_typed_bounded_params`, `apply_helper_lifting`) only
|
||||
/// affect diagnostic emission — `extract_auth_summaries_from_model`
|
||||
/// reads the **base** model so callers must extract summaries before
|
||||
/// passing the model in.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn run_auth_analysis_with_model(
|
||||
mut model: model::AuthorizationModel,
|
||||
tree: &Tree,
|
||||
lang: &str,
|
||||
file_path: &Path,
|
||||
rules: &config::AuthAnalysisRules,
|
||||
var_types: Option<&VarTypes>,
|
||||
global_summaries: Option<&GlobalSummaries>,
|
||||
scan_root: Option<&Path>,
|
||||
) -> Vec<Diag> {
|
||||
if !rules.enabled {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
// Refine `SensitiveOperation::sink_class` using SSA-derived
|
||||
// variable types. Runs only when the caller supplied `var_types`
|
||||
// (skipped for slug-lookup / unit-test call sites).
|
||||
if let Some(types) = var_types {
|
||||
apply_var_types_to_model(&mut model, &rules, types);
|
||||
apply_var_types_to_model(&mut model, rules, types);
|
||||
apply_typed_bounded_params(&mut model, types);
|
||||
}
|
||||
|
||||
|
|
@ -128,11 +206,16 @@ pub fn run_auth_analysis(
|
|||
// (when provided) for cross-file helpers that live in other files.
|
||||
apply_helper_lifting(&mut model, lang, file_path, scan_root, global_summaries);
|
||||
|
||||
// Phase 1 caller-scope IPA: propagate route-handler-level auth
|
||||
// checks DOWN to callee helper units within the same file. See
|
||||
// [`apply_caller_scope_propagation`] for the propagation rule.
|
||||
apply_caller_scope_propagation(&mut model);
|
||||
|
||||
if model.routes.is_empty() && model.units.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
checks::run_checks(&model, &rules)
|
||||
checks::run_checks(&model, rules)
|
||||
.into_iter()
|
||||
.map(|finding| auth_finding_to_diag(&finding, tree, file_path))
|
||||
.collect()
|
||||
|
|
@ -167,8 +250,28 @@ pub fn extract_auth_summaries_by_key(
|
|||
source,
|
||||
file_path,
|
||||
&rules,
|
||||
None,
|
||||
);
|
||||
summaries_keyed_by_func(&model, lang, file_path, scan_root)
|
||||
extract_auth_summaries_from_model(&model, lang, file_path, scan_root)
|
||||
}
|
||||
|
||||
/// Variant of [`extract_auth_summaries_by_key`] that consumes a
|
||||
/// pre-built [`model::AuthorizationModel`].
|
||||
///
|
||||
/// Designed for callers that also need to run the diagnostic pipeline
|
||||
/// (which mutates the model via [`run_auth_analysis_with_model`]):
|
||||
/// extract summaries first against the base model, then hand the same
|
||||
/// model to the diag pipeline so the second
|
||||
/// [`extract::extract_authorization_model`] AST walk per file is
|
||||
/// avoided. See [`run_auth_analysis_with_model`] for the wider
|
||||
/// rationale and measured saving.
|
||||
pub fn extract_auth_summaries_from_model(
|
||||
model: &model::AuthorizationModel,
|
||||
lang: &str,
|
||||
file_path: &Path,
|
||||
scan_root: Option<&Path>,
|
||||
) -> Vec<(FuncKey, model::AuthCheckSummary)> {
|
||||
summaries_keyed_by_func(model, lang, file_path, scan_root)
|
||||
}
|
||||
|
||||
/// Convert an already-built [`model::AuthorizationModel`] into a
|
||||
|
|
@ -444,6 +547,203 @@ fn apply_helper_lifting(
|
|||
}
|
||||
}
|
||||
|
||||
/// Phase 1 caller-scope IPA: propagate route-handler-level auth checks
|
||||
/// DOWN to callee helper units within the same file.
|
||||
///
|
||||
/// `apply_helper_lifting` walks UPWARD: a helper that internally
|
||||
/// proves ownership / membership / etc. has its summary lifted onto
|
||||
/// each call site in the caller. But the inverse direction —
|
||||
/// route handler that authenticates via route-level decorator/
|
||||
/// dependency, then delegates to a private helper that performs the
|
||||
/// actual sink — is the dominant FP shape on FastAPI / Django / Flask
|
||||
/// codebases (sentry, saleor, airflow): the helper has no inline
|
||||
/// auth_checks of its own, so `check_ownership_gaps` flags every
|
||||
/// `session.add(...)` / `Model.objects.filter(id=...)` it contains.
|
||||
///
|
||||
/// This pass closes that gap inside a single file. For each helper
|
||||
/// unit, if **every** same-file caller (across the whole call graph)
|
||||
/// is itself an authorized route handler (route-level non-Login auth
|
||||
/// check) or has already been authorized via this same propagation
|
||||
/// in a prior round, lift the caller's route-level checks onto the
|
||||
/// helper. Iterated to a small fixpoint so transitive helper chains
|
||||
/// `route → mid_helper → leaf_helper` are also covered.
|
||||
///
|
||||
/// Synthetic checks carry `is_route_level=true` so
|
||||
/// `auth_check_covers_subject` short-circuits coverage for any
|
||||
/// subject the helper sees, mirroring the in-handler decorator-lift
|
||||
/// semantics established by [`extract::flask::inject_middleware_auth`].
|
||||
///
|
||||
/// **Soundness rule**: a helper's `unit_callers` list must be
|
||||
/// non-empty AND every caller must be authorized. This refuses to
|
||||
/// authorize:
|
||||
/// * helpers with no in-file caller (dead code or external
|
||||
/// entry-point — could be CLI, cron, test harness, …),
|
||||
/// * helpers called from a mix of authorized routes and unauthorized
|
||||
/// callers (the unauthorized path is the real FP attack surface),
|
||||
/// * helpers called only from another un-lifted helper (no
|
||||
/// evidence the upstream chain authenticates).
|
||||
///
|
||||
/// Cross-file caller-scope IPA — where the route handler lives in
|
||||
/// file A and the helper in file B — is not yet implemented.
|
||||
/// Requires plumbing per-file caller auth checks through
|
||||
/// `GlobalSummaries`, not just the existing per-callee
|
||||
/// `AuthCheckSummary`. See `deep_engine_fixes.md` for the deferred
|
||||
/// follow-up.
|
||||
fn apply_caller_scope_propagation(model: &mut model::AuthorizationModel) {
|
||||
use model::{AnalysisUnitKind, AuthCheck, AuthCheckKind};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
// Build leaf-name → unit_idx map. Only non-route-handler units are
|
||||
// lift TARGETS; route handlers don't need downward lift since they
|
||||
// already carry their own route-level auth.
|
||||
let mut leaf_to_unit: HashMap<String, usize> = HashMap::new();
|
||||
for (idx, unit) in model.units.iter().enumerate() {
|
||||
if unit.kind == AnalysisUnitKind::RouteHandler {
|
||||
continue;
|
||||
}
|
||||
let Some(name) = unit.name.as_deref() else {
|
||||
continue;
|
||||
};
|
||||
let leaf = name.rsplit('.').next().unwrap_or(name);
|
||||
if leaf.is_empty() {
|
||||
continue;
|
||||
}
|
||||
leaf_to_unit.entry(leaf.to_string()).or_insert(idx);
|
||||
}
|
||||
|
||||
// For each callee unit, collect its same-file caller indices.
|
||||
// Iterates every unit's `call_sites` once; a callee with no
|
||||
// matching unit (calls into stdlib, framework, third-party) gets
|
||||
// an empty `unit_callers[i]` and is excluded from propagation
|
||||
// below.
|
||||
let mut unit_callers: Vec<Vec<usize>> = vec![Vec::new(); model.units.len()];
|
||||
for (caller_idx, unit) in model.units.iter().enumerate() {
|
||||
let mut seen_callees: HashSet<usize> = HashSet::new();
|
||||
for call in &unit.call_sites {
|
||||
let leaf = call.name.rsplit('.').next().unwrap_or(&call.name);
|
||||
if let Some(&callee_idx) = leaf_to_unit.get(leaf)
|
||||
&& callee_idx != caller_idx
|
||||
&& seen_callees.insert(callee_idx)
|
||||
{
|
||||
unit_callers[callee_idx].push(caller_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Seed `authorized` only when a unit carries at least one
|
||||
// route-level Other / Membership / Ownership / AdminGuard check.
|
||||
// `LoginGuard` alone proves only identity, not authority, and
|
||||
// `TokenExpiry` / `TokenRecipient` alone don't justify
|
||||
// foreign-id mutations — `has_prior_subject_auth` already filters
|
||||
// those kinds out. Seeding on those would silently authorize
|
||||
// helpers reachable from a login-only route.
|
||||
let is_seed_kind = |k: AuthCheckKind| {
|
||||
!matches!(
|
||||
k,
|
||||
AuthCheckKind::LoginGuard | AuthCheckKind::TokenExpiry | AuthCheckKind::TokenRecipient
|
||||
)
|
||||
};
|
||||
let mut authorized: HashSet<usize> = (0..model.units.len())
|
||||
.filter(|i| {
|
||||
model.units[*i]
|
||||
.auth_checks
|
||||
.iter()
|
||||
.any(|c| c.is_route_level && is_seed_kind(c.kind))
|
||||
})
|
||||
.collect();
|
||||
// Lift ALL route-level non-Login auth checks once a unit is
|
||||
// authorized, including `TokenExpiry` / `TokenRecipient`. Those
|
||||
// kinds are required by `check_token_override_without_validation`
|
||||
// (which gates separately from `has_prior_subject_auth`); without
|
||||
// them the callee fires `token_override_without_validation` even
|
||||
// after `missing_ownership_check` is suppressed. `LoginGuard` is
|
||||
// still excluded — it's too weak to count as a coverage proof for
|
||||
// either downstream check.
|
||||
let unit_route_level_checks: Vec<Vec<AuthCheck>> = model
|
||||
.units
|
||||
.iter()
|
||||
.map(|unit| {
|
||||
unit.auth_checks
|
||||
.iter()
|
||||
.filter(|c| c.is_route_level && c.kind != AuthCheckKind::LoginGuard)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Per-callee aggregated lift checks, populated as we authorize.
|
||||
// Stored separately so we can apply mutations after the fixpoint
|
||||
// loop without invalidating immutable borrows above.
|
||||
let mut helper_lift: HashMap<usize, Vec<AuthCheck>> = HashMap::new();
|
||||
|
||||
const MAX_ROUNDS: usize = 4;
|
||||
for _ in 0..MAX_ROUNDS {
|
||||
let mut grew = false;
|
||||
for (callee_idx, callers) in unit_callers.iter().enumerate().take(model.units.len()) {
|
||||
if authorized.contains(&callee_idx) {
|
||||
continue;
|
||||
}
|
||||
if callers.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if !callers.iter().all(|c| authorized.contains(c)) {
|
||||
continue;
|
||||
}
|
||||
// Aggregate the route-level checks from every authorized
|
||||
// caller. Non-route-handler callers contribute nothing
|
||||
// (their `unit_route_level_checks[c]` is empty by
|
||||
// construction) — only route handlers up the chain seed
|
||||
// real route-level checks, and downstream helpers
|
||||
// propagate those forward via the `is_route_level=true`
|
||||
// flag on the synthetic checks.
|
||||
let mut chosen: Vec<AuthCheck> = Vec::new();
|
||||
for &caller_idx in callers {
|
||||
for check in &unit_route_level_checks[caller_idx] {
|
||||
chosen.push(check.clone());
|
||||
}
|
||||
if let Some(prior) = helper_lift.get(&caller_idx) {
|
||||
for check in prior {
|
||||
chosen.push(check.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
if chosen.is_empty() {
|
||||
continue;
|
||||
}
|
||||
authorized.insert(callee_idx);
|
||||
helper_lift.insert(callee_idx, chosen);
|
||||
grew = true;
|
||||
}
|
||||
if !grew {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (callee_idx, checks) in helper_lift {
|
||||
let unit = &mut model.units[callee_idx];
|
||||
let mut existing_keys: HashSet<((usize, usize), AuthCheckKind, String)> = unit
|
||||
.auth_checks
|
||||
.iter()
|
||||
.map(|c| (c.span, c.kind, c.callee.clone()))
|
||||
.collect();
|
||||
for check in checks {
|
||||
let mut synth = check;
|
||||
// Re-anchor at the callee's start line so the
|
||||
// `check.line <= op.line` gate in `has_prior_subject_auth`
|
||||
// covers every operation inside the callee. Without this
|
||||
// re-anchor, the synthetic check carries the caller's line
|
||||
// (which is greater than the callee's body lines) and
|
||||
// doesn't gate any of the callee's sinks.
|
||||
synth.line = unit.line;
|
||||
synth.callee = format!("(caller-scope lift {})", synth.callee);
|
||||
let key = (synth.span, synth.kind, synth.callee.clone());
|
||||
if existing_keys.insert(key) {
|
||||
unit.auth_checks.push(synth);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a `name → AuthCheckSummary` map by walking each unit's auth
|
||||
/// checks and recording, for every check subject whose value-ref name
|
||||
/// matches a positional parameter name of the unit, that param index
|
||||
|
|
@ -742,11 +1042,14 @@ fn auth_finding_to_diag(finding: &checks::AuthFinding, tree: &Tree, file_path: &
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{VarTypes, apply_var_types_to_model, receiver_root, sink_class_for_type};
|
||||
use super::{
|
||||
VarTypes, apply_caller_scope_propagation, apply_var_types_to_model, receiver_root,
|
||||
sink_class_for_type,
|
||||
};
|
||||
use crate::auth_analysis::config::build_auth_rules;
|
||||
use crate::auth_analysis::model::{
|
||||
AnalysisUnit, AnalysisUnitKind, AuthorizationModel, OperationKind, SensitiveOperation,
|
||||
SinkClass,
|
||||
AnalysisUnit, AnalysisUnitKind, AuthCheck, AuthCheckKind, AuthorizationModel, CallSite,
|
||||
OperationKind, SensitiveOperation, SinkClass,
|
||||
};
|
||||
use crate::ssa::type_facts::TypeKind;
|
||||
use crate::utils::config::Config;
|
||||
|
|
@ -868,6 +1171,239 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// Build a synthetic [`AnalysisUnit`] with the given kind, name,
|
||||
/// and call_site leaf names. No operations or auth_checks; tests
|
||||
/// add those explicitly.
|
||||
fn unit_with_calls(kind: AnalysisUnitKind, name: &str, callees: &[&str]) -> AnalysisUnit {
|
||||
AnalysisUnit {
|
||||
kind,
|
||||
name: Some(name.into()),
|
||||
span: (0, 0),
|
||||
params: Vec::new(),
|
||||
context_inputs: Vec::new(),
|
||||
call_sites: callees
|
||||
.iter()
|
||||
.map(|c| CallSite {
|
||||
name: (*c).to_string(),
|
||||
args: Vec::new(),
|
||||
span: (0, 0),
|
||||
args_value_refs: Vec::new(),
|
||||
})
|
||||
.collect(),
|
||||
auth_checks: Vec::new(),
|
||||
operations: Vec::new(),
|
||||
value_refs: Vec::new(),
|
||||
condition_texts: Vec::new(),
|
||||
line: 1,
|
||||
row_field_vars: HashMap::new(),
|
||||
var_alias_chain: HashMap::new(),
|
||||
row_population_data: HashMap::new(),
|
||||
self_actor_vars: HashSet::new(),
|
||||
self_actor_id_vars: HashSet::new(),
|
||||
authorized_sql_vars: HashSet::new(),
|
||||
const_bound_vars: HashSet::new(),
|
||||
typed_bounded_vars: HashSet::new(),
|
||||
typed_bounded_dto_fields: HashMap::new(),
|
||||
self_scoped_session_bases: HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn route_level_check(kind: AuthCheckKind) -> AuthCheck {
|
||||
AuthCheck {
|
||||
kind,
|
||||
callee: "Security(require_auth)".into(),
|
||||
subjects: Vec::new(),
|
||||
span: (10, 11),
|
||||
line: 1,
|
||||
args: Vec::new(),
|
||||
condition_text: None,
|
||||
is_route_level: true,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn caller_scope_propagation_lifts_route_level_other_to_callee_helper() {
|
||||
// Mirrors the airflow shape:
|
||||
// route handler `ti_update_state` carries route-level Other
|
||||
// (from scoped Security dep), calls `_create_state_update`
|
||||
// (helper); helper's body sinks should inherit the lift.
|
||||
let mut model = AuthorizationModel::default();
|
||||
let mut handler = unit_with_calls(
|
||||
AnalysisUnitKind::RouteHandler,
|
||||
"ti_update_state",
|
||||
&["_create_state_update"],
|
||||
);
|
||||
handler
|
||||
.auth_checks
|
||||
.push(route_level_check(AuthCheckKind::Other));
|
||||
handler
|
||||
.auth_checks
|
||||
.push(route_level_check(AuthCheckKind::TokenExpiry));
|
||||
handler
|
||||
.auth_checks
|
||||
.push(route_level_check(AuthCheckKind::TokenRecipient));
|
||||
let helper = unit_with_calls(AnalysisUnitKind::Function, "_create_state_update", &[]);
|
||||
model.units.push(handler);
|
||||
model.units.push(helper);
|
||||
|
||||
apply_caller_scope_propagation(&mut model);
|
||||
|
||||
// Helper now has 3 lifted auth checks (Other + TokenExpiry +
|
||||
// TokenRecipient), each with `is_route_level=true` and line
|
||||
// re-anchored to helper's start line.
|
||||
let helper = &model.units[1];
|
||||
let kinds: HashSet<AuthCheckKind> = helper.auth_checks.iter().map(|c| c.kind).collect();
|
||||
assert!(
|
||||
kinds.contains(&AuthCheckKind::Other),
|
||||
"helper should inherit Other check from caller"
|
||||
);
|
||||
assert!(
|
||||
kinds.contains(&AuthCheckKind::TokenExpiry),
|
||||
"helper should inherit TokenExpiry check (needed for token_override suppression)"
|
||||
);
|
||||
assert!(
|
||||
kinds.contains(&AuthCheckKind::TokenRecipient),
|
||||
"helper should inherit TokenRecipient check"
|
||||
);
|
||||
assert!(
|
||||
helper.auth_checks.iter().all(|c| c.is_route_level),
|
||||
"lifted checks must keep is_route_level=true"
|
||||
);
|
||||
assert!(
|
||||
helper.auth_checks.iter().all(|c| c.line == helper.line),
|
||||
"lifted check.line must match callee unit start so check.line <= op.line holds"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn caller_scope_propagation_refuses_when_helper_has_unauthorized_caller() {
|
||||
// Helper is called from BOTH an authorized route handler AND
|
||||
// a bare (no-auth) route handler. Soundness rule: if any
|
||||
// caller is unauthorized, do NOT propagate — the unauthorized
|
||||
// path is the real attack surface.
|
||||
let mut model = AuthorizationModel::default();
|
||||
let mut authed = unit_with_calls(
|
||||
AnalysisUnitKind::RouteHandler,
|
||||
"ti_update_state",
|
||||
&["_create_state_update"],
|
||||
);
|
||||
authed
|
||||
.auth_checks
|
||||
.push(route_level_check(AuthCheckKind::Other));
|
||||
let bare = unit_with_calls(
|
||||
AnalysisUnitKind::RouteHandler,
|
||||
"ti_overwrite_state",
|
||||
&["_create_state_update"],
|
||||
);
|
||||
let helper = unit_with_calls(AnalysisUnitKind::Function, "_create_state_update", &[]);
|
||||
model.units.push(authed);
|
||||
model.units.push(bare);
|
||||
model.units.push(helper);
|
||||
|
||||
apply_caller_scope_propagation(&mut model);
|
||||
|
||||
let helper = &model.units[2];
|
||||
assert!(
|
||||
helper.auth_checks.is_empty(),
|
||||
"helper must not be authorized when one caller has no route-level auth"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn caller_scope_propagation_refuses_when_helper_has_no_callers() {
|
||||
// Dead helper — no in-file caller. Could be invoked via CLI
|
||||
// / test / cron / external import. Stay conservative.
|
||||
let mut model = AuthorizationModel::default();
|
||||
let helper = unit_with_calls(AnalysisUnitKind::Function, "_orphan_helper", &[]);
|
||||
model.units.push(helper);
|
||||
|
||||
apply_caller_scope_propagation(&mut model);
|
||||
|
||||
let helper = &model.units[0];
|
||||
assert!(
|
||||
helper.auth_checks.is_empty(),
|
||||
"helper with no in-file callers must not be authorized"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn caller_scope_propagation_transitive_chain_route_to_mid_to_leaf() {
|
||||
// route → mid_helper → leaf_helper. Both helpers should be
|
||||
// authorized in two BFS rounds: round 1 lifts onto mid, round
|
||||
// 2 sees mid as authorized and lifts onto leaf.
|
||||
let mut model = AuthorizationModel::default();
|
||||
let mut handler = unit_with_calls(
|
||||
AnalysisUnitKind::RouteHandler,
|
||||
"ti_update_state",
|
||||
&["_mid_helper"],
|
||||
);
|
||||
handler
|
||||
.auth_checks
|
||||
.push(route_level_check(AuthCheckKind::Other));
|
||||
let mid = unit_with_calls(AnalysisUnitKind::Function, "_mid_helper", &["_leaf_helper"]);
|
||||
let leaf = unit_with_calls(AnalysisUnitKind::Function, "_leaf_helper", &[]);
|
||||
model.units.push(handler);
|
||||
model.units.push(mid);
|
||||
model.units.push(leaf);
|
||||
|
||||
apply_caller_scope_propagation(&mut model);
|
||||
|
||||
let mid_kinds: HashSet<AuthCheckKind> =
|
||||
model.units[1].auth_checks.iter().map(|c| c.kind).collect();
|
||||
let leaf_kinds: HashSet<AuthCheckKind> =
|
||||
model.units[2].auth_checks.iter().map(|c| c.kind).collect();
|
||||
assert!(
|
||||
mid_kinds.contains(&AuthCheckKind::Other),
|
||||
"mid helper should be authorized in round 1"
|
||||
);
|
||||
assert!(
|
||||
leaf_kinds.contains(&AuthCheckKind::Other),
|
||||
"leaf helper should be authorized in round 2 via the lifted mid"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn caller_scope_propagation_does_not_seed_on_loginguard_only_route() {
|
||||
// Route handler with ONLY a LoginGuard route-level check.
|
||||
// LoginGuard alone proves identity, not authority — must not
|
||||
// seed the helper.
|
||||
let mut model = AuthorizationModel::default();
|
||||
let mut handler =
|
||||
unit_with_calls(AnalysisUnitKind::RouteHandler, "list_things", &["_helper"]);
|
||||
handler
|
||||
.auth_checks
|
||||
.push(route_level_check(AuthCheckKind::LoginGuard));
|
||||
let helper = unit_with_calls(AnalysisUnitKind::Function, "_helper", &[]);
|
||||
model.units.push(handler);
|
||||
model.units.push(helper);
|
||||
|
||||
apply_caller_scope_propagation(&mut model);
|
||||
|
||||
let helper = &model.units[1];
|
||||
assert!(
|
||||
helper.auth_checks.is_empty(),
|
||||
"LoginGuard alone must not seed the helper"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn caller_scope_propagation_skips_self_recursive_call() {
|
||||
// Recursive helper that calls itself. The self-edge is
|
||||
// skipped in `unit_callers` construction so the helper has
|
||||
// zero in-file callers and stays unauthorized.
|
||||
let mut model = AuthorizationModel::default();
|
||||
let helper = unit_with_calls(AnalysisUnitKind::Function, "recurse", &["recurse"]);
|
||||
model.units.push(helper);
|
||||
|
||||
apply_caller_scope_propagation(&mut model);
|
||||
|
||||
let helper = &model.units[0];
|
||||
assert!(
|
||||
helper.auth_checks.is_empty(),
|
||||
"self-recursive helper with no other callers must not be authorized"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn apply_var_types_leaves_classification_untouched_when_receiver_unknown() {
|
||||
let cfg = Config::default();
|
||||
|
|
|
|||
|
|
@ -367,6 +367,17 @@ pub struct AuthorizationModel {
|
|||
/// of the framework-request-name allow-list. Empty string when no
|
||||
/// language was supplied (single-file unit-test paths).
|
||||
pub lang: String,
|
||||
/// Cross-file router-dependency lift, keyed by **local** router
|
||||
/// variable name. Pre-populated by the orchestrator before
|
||||
/// extractors run, sourced from `GlobalSummaries.router_facts_by_module`
|
||||
/// for every project file whose `<parent>.include_router(<this_file>.<var>)`
|
||||
/// edge targets a router in the current file. FlaskExtractor merges
|
||||
/// these in alongside locally-declared `dependencies=[...]` so routes
|
||||
/// attached to a bare child router still inherit the parent's
|
||||
/// `Security(...)` / `Depends(...)` deps. Empty when no cross-file
|
||||
/// resolution applies (most files) or when global summaries are not
|
||||
/// available (unit-test / single-file scan paths).
|
||||
pub cross_file_router_deps: HashMap<String, Vec<(CallSite, bool)>>,
|
||||
}
|
||||
|
||||
impl AuthorizationModel {
|
||||
|
|
|
|||
516
src/auth_analysis/router_facts.rs
Normal file
516
src/auth_analysis/router_facts.rs
Normal file
|
|
@ -0,0 +1,516 @@
|
|||
//! Cross-file FastAPI router-dependency tracking.
|
||||
//!
|
||||
//! FastAPI propagates `dependencies=[Security(...), Depends(...)]` declared
|
||||
//! at the router level onto every route attached to that router, including
|
||||
//! routes attached via cross-file `<parent>.include_router(<child>.router)`
|
||||
//! lifts. The per-file router-dep collector in
|
||||
//! `crate::auth_analysis::extract::flask::collect_router_level_dependencies`
|
||||
//! sees only the file under analysis, so a bare child router whose auth is
|
||||
//! declared on a parent router in `__init__.py` (canonical airflow shape) has
|
||||
//! no visible deps. This module captures the cross-file edges + parent
|
||||
//! declarations during pass 1 and resolves them into a per-child effective
|
||||
//! dep map for pass 2's auth analysis.
|
||||
//!
|
||||
//! Storage shape: per-Python-file [`PerFileRouterFacts`] with
|
||||
//! `local_router_deps` (the `<router> = X(deps=[…])` declarations
|
||||
//! visible in the file) and `include_router_edges` (the
|
||||
//! `<parent>.include_router(<child_module>.<child_var>, …)` calls).
|
||||
//! Persisted into `crate::summary::GlobalSummaries::router_facts_by_module`
|
||||
//! during pass 1 and resolved into the active file's
|
||||
//! [`crate::auth_analysis::model::AuthorizationModel::cross_file_router_deps`]
|
||||
//! at pass 2 entry.
|
||||
//!
|
||||
//! Module identity: file basename without `.py`. This is approximate (two
|
||||
//! files named `task_instances.py` in different packages would collide) but
|
||||
//! covers airflow-style codebases where include_router targets reference the
|
||||
//! child's module name directly (`task_instances.router`). Transitive lifts
|
||||
//! (`grandparent.include_router(parent); parent.include_router(child)`) are
|
||||
//! resolved by walking the index iteratively at lookup time.
|
||||
|
||||
use crate::auth_analysis::extract::common::{
|
||||
call_site_from_node, named_children, string_literal_value, text,
|
||||
};
|
||||
use crate::auth_analysis::model::CallSite;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
/// Per-file extracted router declarations + include_router edges.
|
||||
/// Persisted into `GlobalSummaries.router_facts_by_module` keyed by the
|
||||
/// file's [`module_id_for_path`]. Single-purpose: drives the cross-file
|
||||
/// router-dep resolution at pass 2 entry.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct PerFileRouterFacts {
|
||||
/// Local router var → declared inline `dependencies=[...]` deps.
|
||||
/// Mirrors `flask::collect_router_level_dependencies` output.
|
||||
pub local_router_deps: HashMap<String, Vec<(CallSite, bool)>>,
|
||||
/// `<parent>.include_router(<child_module>.<child_var>, ...)` edges
|
||||
/// observed in this file. Each edge specifies a parent router var
|
||||
/// (local to this file) and a child router identified by its
|
||||
/// module_id + var name. Cross-file lookups walk these.
|
||||
pub include_router_edges: Vec<RouterIncludeEdge>,
|
||||
}
|
||||
|
||||
/// A single `<parent>.include_router(<child_module>.<child_var>, ...)`
|
||||
/// edge. `parent_var` is the local variable that owns the deps to lift;
|
||||
/// `child_module_id` + `child_var` together name the child router whose
|
||||
/// routes inherit the parent's deps.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct RouterIncludeEdge {
|
||||
pub parent_var: String,
|
||||
pub child_module_id: String,
|
||||
pub child_var: String,
|
||||
}
|
||||
|
||||
/// Translate a file path into a stable cross-file module identifier.
|
||||
///
|
||||
/// Currently the file's basename without the `.py` extension — sufficient
|
||||
/// for the airflow shape (`from . import task_instances; …
|
||||
/// authenticated_router.include_router(task_instances.router)`) where the
|
||||
/// include_router target's module reference is the child file's own
|
||||
/// basename. Returns `None` for files whose stem is `__init__`
|
||||
/// (parent files don't need to be looked up; they emit edges only) or
|
||||
/// for paths with no usable stem.
|
||||
pub fn module_id_for_path(path: &Path) -> Option<String> {
|
||||
let stem = path.file_stem()?.to_str()?;
|
||||
if stem.is_empty() || stem == "__init__" {
|
||||
return None;
|
||||
}
|
||||
Some(stem.to_string())
|
||||
}
|
||||
|
||||
/// Stable storage key for the per-project router-facts index.
|
||||
///
|
||||
/// Uses the file's **full filesystem path** (lossy-converted to UTF-8)
|
||||
/// because the only goal of the storage key is uniqueness across files
|
||||
/// in a single scan. Collisions on shorter forms (file basename or
|
||||
/// `<parent_dir>::__init__`) are common in real codebases — airflow
|
||||
/// alone has 17 `routes/__init__.py` files spread across providers and
|
||||
/// test trees, and any keying scheme that drops the path prefix would
|
||||
/// have one such file silently overwrite another's `include_router`
|
||||
/// edges, breaking the cross-file lift on whichever parent lost the
|
||||
/// race.
|
||||
///
|
||||
/// The lookup side ([`crate::summary::GlobalSummaries::resolve_cross_file_router_deps`])
|
||||
/// iterates every stored entry and matches child references by the
|
||||
/// **last segment** ([`module_id_for_path`]) — so duplicate-basename
|
||||
/// children still get every parent's deps accumulated, which is the
|
||||
/// FastAPI-runtime-correct behavior. Path-based storage keys plus
|
||||
/// basename-based lookup keys is the right pairing.
|
||||
pub fn module_id_for_storage(path: &Path) -> Option<String> {
|
||||
let s = path.to_string_lossy();
|
||||
if s.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some(s.into_owned())
|
||||
}
|
||||
|
||||
/// Extract router-level deps + include_router edges from a Python AST.
|
||||
/// Returns `None` for non-Python files; pass 1 callers must gate on the
|
||||
/// file's language slug before invoking. Empty facts (no routers and no
|
||||
/// edges) still return `Some(Default::default())` so callers can record
|
||||
/// an empty index entry without re-extracting.
|
||||
pub fn extract_router_facts_for_python(tree: &Tree, bytes: &[u8]) -> PerFileRouterFacts {
|
||||
let mut facts = PerFileRouterFacts::default();
|
||||
let root = tree.root_node();
|
||||
collect_local_router_deps(root, bytes, &mut facts.local_router_deps);
|
||||
collect_include_router_edges(root, bytes, &mut facts.include_router_edges);
|
||||
facts
|
||||
}
|
||||
|
||||
/// Walk the module root for top-level `<id> = <RouterCtor>(..., dependencies=[…])`
|
||||
/// assignments, mirroring
|
||||
/// [`crate::auth_analysis::extract::flask::collect_router_level_dependencies`].
|
||||
/// Reimplemented here to avoid an inter-module Visibility tangle and
|
||||
/// to keep this module self-contained — the router extractor is the
|
||||
/// single source of truth at FlaskExtractor::extract time, this module
|
||||
/// is a parallel collection path that runs in pass 1.
|
||||
fn collect_local_router_deps(
|
||||
root: Node<'_>,
|
||||
bytes: &[u8],
|
||||
out: &mut HashMap<String, Vec<(CallSite, bool)>>,
|
||||
) {
|
||||
for child in named_children(root) {
|
||||
let assign = match child.kind() {
|
||||
"expression_statement" => named_children(child).into_iter().next(),
|
||||
"assignment" => Some(child),
|
||||
_ => None,
|
||||
};
|
||||
let Some(assign) = assign else { continue };
|
||||
if assign.kind() != "assignment" {
|
||||
continue;
|
||||
}
|
||||
let Some(left) = assign.child_by_field_name("left") else {
|
||||
continue;
|
||||
};
|
||||
if left.kind() != "identifier" {
|
||||
continue;
|
||||
}
|
||||
let Some(right) = assign.child_by_field_name("right") else {
|
||||
continue;
|
||||
};
|
||||
if right.kind() != "call" {
|
||||
continue;
|
||||
}
|
||||
let Some(function) = right.child_by_field_name("function") else {
|
||||
continue;
|
||||
};
|
||||
let function_text = text(function, bytes);
|
||||
if !is_router_like_constructor(&function_text) {
|
||||
continue;
|
||||
}
|
||||
let Some(arguments) = right.child_by_field_name("arguments") else {
|
||||
continue;
|
||||
};
|
||||
let Some(deps_value) = keyword_argument_value(arguments, bytes, "dependencies") else {
|
||||
continue;
|
||||
};
|
||||
let mut deps = Vec::new();
|
||||
for element in named_children(deps_value) {
|
||||
if let Some(unwrapped) = unwrap_depends_call(element, bytes) {
|
||||
deps.push(unwrapped);
|
||||
}
|
||||
}
|
||||
if deps.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let var_name = text(left, bytes).trim().to_string();
|
||||
if var_name.is_empty() {
|
||||
continue;
|
||||
}
|
||||
out.entry(var_name).or_insert(deps);
|
||||
}
|
||||
}
|
||||
|
||||
/// Walk every call expression in the file looking for
|
||||
/// `<parent>.include_router(<child_module>.<child_var>, ...)` shapes.
|
||||
/// Records `(parent_var, child_module_id, child_var)` for each. Skips
|
||||
/// edges where the child reference is a bare identifier (no module
|
||||
/// segment) — those would require Python import resolution to attach
|
||||
/// to a specific file, beyond this single-hop basename matching.
|
||||
fn collect_include_router_edges(root: Node<'_>, bytes: &[u8], out: &mut Vec<RouterIncludeEdge>) {
|
||||
walk_for_include_router(root, bytes, out);
|
||||
}
|
||||
|
||||
fn walk_for_include_router(node: Node<'_>, bytes: &[u8], out: &mut Vec<RouterIncludeEdge>) {
|
||||
if node.kind() == "call"
|
||||
&& let Some(edge) = parse_include_router_call(node, bytes)
|
||||
{
|
||||
out.push(edge);
|
||||
}
|
||||
for child in named_children(node) {
|
||||
walk_for_include_router(child, bytes, out);
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_include_router_call(node: Node<'_>, bytes: &[u8]) -> Option<RouterIncludeEdge> {
|
||||
let function = node.child_by_field_name("function")?;
|
||||
if function.kind() != "attribute" {
|
||||
return None;
|
||||
}
|
||||
let attr = function.child_by_field_name("attribute")?;
|
||||
if text(attr, bytes) != "include_router" {
|
||||
return None;
|
||||
}
|
||||
let object = function.child_by_field_name("object")?;
|
||||
let parent_var = match object.kind() {
|
||||
"identifier" => text(object, bytes).trim().to_string(),
|
||||
_ => return None,
|
||||
};
|
||||
if parent_var.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let arguments = node.child_by_field_name("arguments")?;
|
||||
// First positional arg (skip keyword_argument children).
|
||||
let first = named_children(arguments)
|
||||
.into_iter()
|
||||
.find(|child| child.kind() != "keyword_argument")?;
|
||||
if first.kind() != "attribute" {
|
||||
return None;
|
||||
}
|
||||
let child_attr = first.child_by_field_name("attribute")?;
|
||||
let child_var = text(child_attr, bytes).trim().to_string();
|
||||
if child_var.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let child_object = first.child_by_field_name("object")?;
|
||||
// Use the **last segment** of a possibly-dotted module reference as
|
||||
// the cross-file module id. `task_instances.router` →
|
||||
// module_id="task_instances"; `pkg.task_instances.router` →
|
||||
// module_id="task_instances" (last attribute segment).
|
||||
let child_module_id = match child_object.kind() {
|
||||
"identifier" => text(child_object, bytes).trim().to_string(),
|
||||
"attribute" => {
|
||||
let inner_attr = child_object.child_by_field_name("attribute")?;
|
||||
text(inner_attr, bytes).trim().to_string()
|
||||
}
|
||||
_ => return None,
|
||||
};
|
||||
if child_module_id.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some(RouterIncludeEdge {
|
||||
parent_var,
|
||||
child_module_id,
|
||||
child_var,
|
||||
})
|
||||
}
|
||||
|
||||
fn keyword_argument_value<'tree>(
|
||||
arguments: Node<'tree>,
|
||||
bytes: &[u8],
|
||||
name: &str,
|
||||
) -> Option<Node<'tree>> {
|
||||
for arg in named_children(arguments) {
|
||||
if arg.kind() != "keyword_argument" {
|
||||
continue;
|
||||
}
|
||||
let key = arg.child_by_field_name("name")?;
|
||||
if text(key, bytes) == name {
|
||||
return arg.child_by_field_name("value");
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Local copy of the router-constructor recogniser (parallel to
|
||||
/// [`crate::auth_analysis::extract::flask::is_router_like_constructor`]
|
||||
/// to avoid the visibility tangle).
|
||||
fn is_router_like_constructor(callee: &str) -> bool {
|
||||
let trimmed = callee.trim();
|
||||
let tail = trimmed.rsplit('.').next().unwrap_or(trimmed);
|
||||
if tail == "APIRouter" || tail == "FastAPI" || tail == "VersionedAPIRouter" {
|
||||
return true;
|
||||
}
|
||||
if tail.len() > "Router".len()
|
||||
&& tail.ends_with("Router")
|
||||
&& tail.chars().next().is_some_and(|c| c.is_ascii_uppercase())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Cross-file dep-marker unwrapper. Differs from the in-file
|
||||
/// [`crate::auth_analysis::extract::flask::unwrap_depends_call`] in
|
||||
/// the *scoped-security* gating policy:
|
||||
///
|
||||
/// * **In-file** (per-route or per-router declarations visible to
|
||||
/// the active file's FlaskExtractor): only `Security(callable,
|
||||
/// scopes=[non-empty])` flips `scoped_security = true`. A bare
|
||||
/// `Security(callable)` stays as a LoginGuard — conservative because
|
||||
/// per-route bare Security is often used for login-only deps.
|
||||
///
|
||||
/// * **Cross-file via `include_router`** (this function, persisted
|
||||
/// into the project-wide router-facts index for the cross-file lift):
|
||||
/// ANY `Security(...)` marker at the parent-router level flips
|
||||
/// `scoped_security = true`, regardless of explicit `scopes=[...]`.
|
||||
/// Rationale: the FastAPI architectural pattern
|
||||
/// `parent_router = APIRouter(dependencies=[Security(callable)])`
|
||||
/// followed by `parent_router.include_router(child_router, ...)` is
|
||||
/// structurally a declaration that **every route under the child
|
||||
/// router is auth-protected**. Treating it as authorization (Other
|
||||
/// AuthCheckKind, via the existing `inject_middleware_auth` scoped
|
||||
/// promotion) is semantically correct — the developer's `Security`
|
||||
/// marker placement IS the authorization signal. Bare `Depends(...)`
|
||||
/// at the parent-router level is NOT promoted (it's a generic dep,
|
||||
/// often a login fetcher).
|
||||
fn unwrap_depends_call(node: Node<'_>, bytes: &[u8]) -> Option<(CallSite, bool)> {
|
||||
if node.kind() != "call" {
|
||||
return None;
|
||||
}
|
||||
let function = node.child_by_field_name("function")?;
|
||||
let function_text = text(function, bytes);
|
||||
if !is_dep_marker_callee(&function_text) {
|
||||
return None;
|
||||
}
|
||||
let is_security = is_security_marker(&function_text);
|
||||
let arguments = node.child_by_field_name("arguments")?;
|
||||
let children = named_children(arguments);
|
||||
let first = children
|
||||
.iter()
|
||||
.copied()
|
||||
.find(|child| child.kind() != "keyword_argument")?;
|
||||
// Cross-file scoped policy: any Security marker at parent-router
|
||||
// level → scoped=true. See doc comment above for rationale.
|
||||
let scoped_security = is_security;
|
||||
let _ = string_literal_value;
|
||||
let _ = keyword_argument_value;
|
||||
match first.kind() {
|
||||
"call" => Some((call_site_from_node(first, bytes), scoped_security)),
|
||||
"identifier" | "attribute" | "scoped_identifier" => {
|
||||
Some((call_site_from_node(first, bytes), scoped_security))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_dep_marker_callee(callee: &str) -> bool {
|
||||
let trimmed = callee.trim();
|
||||
matches!(
|
||||
trimmed,
|
||||
"Depends"
|
||||
| "fastapi.Depends"
|
||||
| "fastapi.params.Depends"
|
||||
| "Security"
|
||||
| "fastapi.Security"
|
||||
| "fastapi.params.Security"
|
||||
)
|
||||
}
|
||||
|
||||
fn is_security_marker(callee: &str) -> bool {
|
||||
let trimmed = callee.trim();
|
||||
matches!(
|
||||
trimmed,
|
||||
"Security" | "fastapi.Security" | "fastapi.params.Security"
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tree_sitter::Parser;
|
||||
|
||||
fn parse_python(source: &str) -> Tree {
|
||||
let mut parser = Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter::Language::from(tree_sitter_python::LANGUAGE))
|
||||
.expect("python language");
|
||||
parser.parse(source, None).expect("parse")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn module_id_for_path_strips_py_extension() {
|
||||
assert_eq!(
|
||||
module_id_for_path(Path::new("/x/y/task_instances.py")),
|
||||
Some("task_instances".into())
|
||||
);
|
||||
// `__init__` returns None — parent files are storage-only, not
|
||||
// lookup keys.
|
||||
assert_eq!(module_id_for_path(Path::new("/x/y/__init__.py")), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn module_id_for_storage_uses_full_path_to_avoid_basename_collisions() {
|
||||
// Different `routes/__init__.py` files in different packages
|
||||
// must produce DIFFERENT keys — basename / parent-dir keying
|
||||
// would collide on real codebases (airflow alone has 17
|
||||
// `routes/__init__.py` files across its provider tree).
|
||||
let a = module_id_for_storage(Path::new(
|
||||
"/x/airflow-core/src/airflow/api_fastapi/execution_api/routes/__init__.py",
|
||||
))
|
||||
.unwrap();
|
||||
let b = module_id_for_storage(Path::new(
|
||||
"/x/airflow-core/src/airflow/api_fastapi/core_api/routes/__init__.py",
|
||||
))
|
||||
.unwrap();
|
||||
assert_ne!(a, b);
|
||||
}
|
||||
|
||||
/// Canonical airflow shape — `routes/__init__.py` declares
|
||||
/// `authenticated_router = VersionedAPIRouter(dependencies=[Security(require_auth)])`
|
||||
/// and lifts every per-file child router via `include_router(...)`.
|
||||
/// Pass 1 must capture both the parent's local deps and the edges
|
||||
/// targeting `task_instances.router`. Cross-file Security wrappers
|
||||
/// (regardless of explicit `scopes=[...]`) are flagged scoped — the
|
||||
/// architectural intent of
|
||||
/// `parent_router = X(dependencies=[Security(callable)])` followed by
|
||||
/// `parent_router.include_router(child_router)` is auth scoping over
|
||||
/// every child route. See the `unwrap_depends_call` doc comment for
|
||||
/// the policy rationale.
|
||||
#[test]
|
||||
fn extract_router_facts_captures_parent_and_edges() {
|
||||
let src = "from cadwyn import VersionedAPIRouter\n\
|
||||
from fastapi import APIRouter, Security\n\
|
||||
from . import task_instances, dag_runs\n\
|
||||
from .security import require_auth\n\
|
||||
\n\
|
||||
execution_api_router = APIRouter()\n\
|
||||
authenticated_router = VersionedAPIRouter(dependencies=[Security(require_auth)])\n\
|
||||
\n\
|
||||
authenticated_router.include_router(task_instances.router, prefix=\"/task-instances\")\n\
|
||||
authenticated_router.include_router(dag_runs.router, prefix=\"/dag-runs\")\n\
|
||||
execution_api_router.include_router(authenticated_router)\n";
|
||||
let tree = parse_python(src);
|
||||
let bytes = src.as_bytes();
|
||||
let facts = extract_router_facts_for_python(&tree, bytes);
|
||||
|
||||
let parent_deps = facts
|
||||
.local_router_deps
|
||||
.get("authenticated_router")
|
||||
.expect("authenticated_router deps captured");
|
||||
assert_eq!(parent_deps.len(), 1);
|
||||
let (site, scoped) = &parent_deps[0];
|
||||
assert_eq!(site.name, "require_auth");
|
||||
assert!(
|
||||
*scoped,
|
||||
"cross-file: any Security marker is scoped-equivalent"
|
||||
);
|
||||
|
||||
// execution_api_router has no deps → no entry.
|
||||
assert!(!facts.local_router_deps.contains_key("execution_api_router"));
|
||||
|
||||
// Two child include_router edges + one nested
|
||||
// execution_api_router.include_router(authenticated_router) edge.
|
||||
assert!(facts.include_router_edges.iter().any(|e| {
|
||||
e.parent_var == "authenticated_router"
|
||||
&& e.child_module_id == "task_instances"
|
||||
&& e.child_var == "router"
|
||||
}));
|
||||
assert!(facts.include_router_edges.iter().any(|e| {
|
||||
e.parent_var == "authenticated_router"
|
||||
&& e.child_module_id == "dag_runs"
|
||||
&& e.child_var == "router"
|
||||
}));
|
||||
}
|
||||
|
||||
/// `<parent>.include_router(<bare_var>)` — child reference is a bare
|
||||
/// identifier, no module segment. Cannot resolve to a specific
|
||||
/// file, so no edge is emitted. This includes the canonical
|
||||
/// `execution_api_router.include_router(authenticated_router)` chain
|
||||
/// where the child is a sibling router declared in the same file —
|
||||
/// transitive in-file lifts are handled by the local-deps map, not
|
||||
/// the cross-file edge list.
|
||||
#[test]
|
||||
fn extract_router_facts_skips_bare_identifier_child_refs() {
|
||||
let src = "outer = APIRouter()\nouter.include_router(authenticated_router)\n";
|
||||
let tree = parse_python(src);
|
||||
let bytes = src.as_bytes();
|
||||
let facts = extract_router_facts_for_python(&tree, bytes);
|
||||
assert!(facts.include_router_edges.is_empty());
|
||||
}
|
||||
|
||||
/// Scoped Security at the parent level (real-world airflow
|
||||
/// `ti_id_router` flavor). The `scoped` flag must round-trip.
|
||||
#[test]
|
||||
fn extract_router_facts_picks_up_scoped_security() {
|
||||
let src = "ti_id_router = VersionedAPIRouter(\n route_class=ExecutionAPIRoute,\n dependencies=[\n Security(require_auth, scopes=[\"ti:self\"]),\n ],\n)\n";
|
||||
let tree = parse_python(src);
|
||||
let bytes = src.as_bytes();
|
||||
let facts = extract_router_facts_for_python(&tree, bytes);
|
||||
let deps = facts
|
||||
.local_router_deps
|
||||
.get("ti_id_router")
|
||||
.expect("ti_id_router deps captured");
|
||||
let (_site, scoped) = &deps[0];
|
||||
assert!(*scoped, "scopes=[\"ti:self\"] must mark scoped");
|
||||
}
|
||||
|
||||
/// Cross-file `Depends(callable)` at parent-router level is NOT
|
||||
/// scoped — the policy promotes only Security markers (which
|
||||
/// signal authorization intent), not generic Depends (which are
|
||||
/// often login fetchers). Bare `Depends(get_current_user)` lifted
|
||||
/// onto a child router via `include_router` stays as a LoginGuard
|
||||
/// on the child's per-route auth checks.
|
||||
#[test]
|
||||
fn extract_router_facts_does_not_promote_depends() {
|
||||
let src = "from fastapi import APIRouter, Depends\n\
|
||||
v1 = APIRouter(dependencies=[Depends(get_current_user)])\n";
|
||||
let tree = parse_python(src);
|
||||
let bytes = src.as_bytes();
|
||||
let facts = extract_router_facts_for_python(&tree, bytes);
|
||||
let deps = facts.local_router_deps.get("v1").expect("v1 deps captured");
|
||||
let (_site, scoped) = &deps[0];
|
||||
assert!(!*scoped, "Depends never scoped-security at cross-file lift");
|
||||
}
|
||||
}
|
||||
|
|
@ -3116,3 +3116,97 @@ fn chained_method_call_rebinds_to_inner_gated_sink() {
|
|||
call inner-gate rebinding fired"
|
||||
);
|
||||
}
|
||||
|
||||
/// Ternary-RHS branches are lowered into a diamond CFG by
|
||||
/// `build_ternary_diamond` so the condition is control-flow and the
|
||||
/// branches are data-flow that joins at a phi. But push_node only does
|
||||
/// suffix/prefix matching on the branch text, so a source-shaped member
|
||||
/// expression like `req.query.lng` does not classify (the rule matcher
|
||||
/// is `req.query`, which neither suffix-matches nor prefix-matches
|
||||
/// `req.query.lng`). `lower_ternary_branch` runs the segment-strip-
|
||||
/// and-retry classifier on the branch AST to recover the source label,
|
||||
/// mirroring what `pre_emit_arg_source_nodes` does for call arguments.
|
||||
///
|
||||
/// Without this, `let arr = cond ? req.query.lng : "";` lowers each
|
||||
/// branch to a labelless Assign-with-empty-uses, the join phi sees no
|
||||
/// taint, and downstream sinks miss the flow. Motivated by the
|
||||
/// i18next-http-middleware advisory GHSA-jfgf-83c5-2c4m / CVE-2026-42353.
|
||||
#[test]
|
||||
fn js_ternary_branch_member_expression_classified_as_source() {
|
||||
let src = b"function h(req) { const arr = req.query.lng ? req.query.lng : ''; }";
|
||||
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
|
||||
let mut found_source_branch = false;
|
||||
for n in cfg.node_indices() {
|
||||
let info = &cfg[n];
|
||||
if info.taint.defines.as_deref() == Some("arr")
|
||||
&& info
|
||||
.taint
|
||||
.labels
|
||||
.iter()
|
||||
.any(|l| matches!(l, crate::labels::DataLabel::Source(_)))
|
||||
{
|
||||
found_source_branch = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert!(
|
||||
found_source_branch,
|
||||
"expected at least one ternary branch defining `arr` to carry a \
|
||||
Source label after segment-strip classification of `req.query.lng`"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_ternary_branch_const_strings_have_no_source() {
|
||||
// Both branches are constant strings -> no Source label should be
|
||||
// synthesised by the segment-strip pass. Pins precision: the fix
|
||||
// only fires when first_member_label finds a real source-shaped
|
||||
// expression in the branch AST.
|
||||
let src = b"function h(cond) { const x = cond ? 'a' : 'b'; }";
|
||||
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
|
||||
for n in cfg.node_indices() {
|
||||
let info = &cfg[n];
|
||||
if info.taint.defines.as_deref() == Some("x") {
|
||||
assert!(
|
||||
!info
|
||||
.taint
|
||||
.labels
|
||||
.iter()
|
||||
.any(|l| matches!(l, crate::labels::DataLabel::Source(_))),
|
||||
"constant-string ternary branch must not carry a Source label; \
|
||||
got labels = {:?}",
|
||||
info.taint.labels
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_ternary_branch_subscript_source_classified() {
|
||||
// Subscript-form sources (`req.body['key']`) reach via the
|
||||
// first_member_label subscript-expression arm. Pins the same fix
|
||||
// for subscript-shaped source branches.
|
||||
let src = b"function h(req) { const x = req.body ? req.body['k'] : ''; }";
|
||||
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "javascript", ts_lang);
|
||||
let mut found_source_branch = false;
|
||||
for n in cfg.node_indices() {
|
||||
let info = &cfg[n];
|
||||
if info.taint.defines.as_deref() == Some("x")
|
||||
&& info
|
||||
.taint
|
||||
.labels
|
||||
.iter()
|
||||
.any(|l| matches!(l, crate::labels::DataLabel::Source(_)))
|
||||
{
|
||||
found_source_branch = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert!(
|
||||
found_source_branch,
|
||||
"expected ternary subscript branch defining `x` to carry a Source label"
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
use super::helpers::first_member_label;
|
||||
use super::{
|
||||
AstMeta, Cfg, EdgeKind, MAX_COND_VARS, MAX_CONDITION_TEXT_LEN, NodeInfo, StmtKind,
|
||||
collect_idents, connect_all, detect_eq_with_const, detect_negation, has_call_descendant,
|
||||
|
|
@ -349,6 +350,33 @@ pub(super) fn lower_ternary_branch<'a>(
|
|||
}
|
||||
}
|
||||
|
||||
// Bridge source recognition to ternary branches. push_node only does
|
||||
// suffix/prefix matching on the branch text, so a source-shaped member
|
||||
// expression like `req.query.lng` doesn't classify (the rule matcher
|
||||
// is `req.query`, which neither suffix-matches nor prefix-matches
|
||||
// `req.query.lng`). Run the segment-strip-and-retry classifier on
|
||||
// the branch AST to recover the source label, mirroring what
|
||||
// `pre_emit_arg_source_nodes` does for call arguments and what the
|
||||
// `Kind::CallWrapper | Kind::Assignment` gate at push_node:1827 does
|
||||
// for whole declarations. Without this, `let arr = cond ? req.query.lng
|
||||
// : "";` lowers each branch to a labelless Assign-with-empty-uses, the
|
||||
// join phi sees no taint, and downstream sinks miss the flow.
|
||||
if !g[node]
|
||||
.taint
|
||||
.labels
|
||||
.iter()
|
||||
.any(|l| matches!(l, DataLabel::Source(_)))
|
||||
{
|
||||
let extra = analysis_rules
|
||||
.map(|r| r.extra_labels.as_slice())
|
||||
.filter(|s| !s.is_empty());
|
||||
if let Some(found @ DataLabel::Source(_)) =
|
||||
first_member_label(branch_ast, lang, code, extra)
|
||||
{
|
||||
g[node].taint.labels.push(found);
|
||||
}
|
||||
}
|
||||
|
||||
connect_all(g, preds, node, pred_edge);
|
||||
vec![node]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -847,10 +847,18 @@ pub(super) fn detect_negation<'a>(
|
|||
};
|
||||
|
||||
// `!expr` appears as unary_expression, not_operator, or prefix_unary_expression
|
||||
// with a `!` or `not` operator child.
|
||||
// with a `!` or `not` operator child. PHP's tree-sitter grammar emits
|
||||
// `unary_op_expression` for unary `!` (and `-`/`+`/`~`) — without it,
|
||||
// `if (!validate($x))` carries `condition_negated=false` and the
|
||||
// True branch is treated as the validated path even though it is the
|
||||
// rejection path, leaving downstream sinks unsuppressed.
|
||||
let is_negation_wrapper = matches!(
|
||||
cond.kind(),
|
||||
"unary_expression" | "not_operator" | "prefix_unary_expression" | "unary_not"
|
||||
"unary_expression"
|
||||
| "not_operator"
|
||||
| "prefix_unary_expression"
|
||||
| "unary_not"
|
||||
| "unary_op_expression"
|
||||
);
|
||||
|
||||
if is_negation_wrapper {
|
||||
|
|
@ -3233,6 +3241,7 @@ pub(super) fn build_sub<'a>(
|
|||
| "not_operator"
|
||||
| "prefix_unary_expression"
|
||||
| "unary_not"
|
||||
| "unary_op_expression"
|
||||
)
|
||||
});
|
||||
|
||||
|
|
@ -3472,6 +3481,7 @@ pub(super) fn build_sub<'a>(
|
|||
| "not_operator"
|
||||
| "prefix_unary_expression"
|
||||
| "unary_not"
|
||||
| "unary_op_expression"
|
||||
)
|
||||
})
|
||||
.unwrap_or(false);
|
||||
|
|
|
|||
|
|
@ -463,6 +463,56 @@ fn sink_args_typed_safe(ctx: &AnalysisContext, sink: NodeIndex, sink_caps: Cap)
|
|||
type_facts_suppress(&values, sink_caps, type_facts)
|
||||
}
|
||||
|
||||
/// Suppress a `cfg-unguarded-sink` SQL_QUERY finding when any positional
|
||||
/// argument to the sink Call is provably a JPA / Hibernate Criteria query
|
||||
/// object ([`crate::ssa::type_facts::TypeKind::JpaCriteriaQuery`]).
|
||||
///
|
||||
/// Receiver values are deliberately excluded, the receiver of a JPA
|
||||
/// query method (`session.createQuery(cq)`, `em.createQuery(cq)`,
|
||||
/// `session.executeUpdate(cq)`) is the connection / EntityManager
|
||||
/// channel, never the SQL payload. Including the receiver in the type
|
||||
/// check would make this suppression unreachable since `Session` /
|
||||
/// `EntityManager` values are typed `Object` / `Unknown` and never
|
||||
/// `JpaCriteriaQuery` themselves.
|
||||
///
|
||||
/// Closes the dominant FP cluster across openmrs (169 of 216
|
||||
/// cfg-unguarded-sink), xwiki, and keycloak: Hibernate DAO methods
|
||||
/// build a `CriteriaQuery<Foo>` via `cb.createQuery(Foo.class)` +
|
||||
/// `Root` / `Predicate` API, then hand the query object to
|
||||
/// `session.createQuery(cq)` for execution. No string concatenation
|
||||
/// happens, JPA emits parameterized SQL by construction.
|
||||
fn sink_args_jpa_criteria_query_safe(
|
||||
ctx: &AnalysisContext,
|
||||
sink: NodeIndex,
|
||||
sink_caps: Cap,
|
||||
) -> bool {
|
||||
if !sink_caps.intersects(Cap::SQL_QUERY) {
|
||||
return false;
|
||||
}
|
||||
let Some(facts) = ctx.body_const_facts else {
|
||||
return false;
|
||||
};
|
||||
let Some(type_facts) = ctx.type_facts else {
|
||||
return false;
|
||||
};
|
||||
let Some(&sink_val) = facts.ssa.cfg_node_map.get(&sink) else {
|
||||
return false;
|
||||
};
|
||||
let Some(inst) = find_inst(&facts.ssa, sink_val) else {
|
||||
return false;
|
||||
};
|
||||
let SsaOp::Call { args, .. } = &inst.op else {
|
||||
return false;
|
||||
};
|
||||
let mut values: Vec<SsaValue> = Vec::new();
|
||||
for group in args {
|
||||
for v in group.iter() {
|
||||
values.push(*v);
|
||||
}
|
||||
}
|
||||
crate::ssa::type_facts::is_safe_query_object_arg(&values, sink_caps, type_facts)
|
||||
}
|
||||
|
||||
/// Walk the sink's Call SSA arguments and check whether every real argument
|
||||
/// resolves through a defining `SsaOp::Call` whose callee carries an SSA
|
||||
/// summary with `validated_params_to_return` covering every propagating
|
||||
|
|
@ -1210,6 +1260,17 @@ impl CfgAnalysis for UnguardedSink {
|
|||
continue;
|
||||
}
|
||||
|
||||
// JPA / Hibernate Criteria-query suppression: receiver-call SQL
|
||||
// sinks like `session.createQuery(cq)` / `em.executeUpdate(cq)`
|
||||
// are safe by construction when arg 0 is a structural Criteria
|
||||
// object built via `CriteriaBuilder` (returns parameterized
|
||||
// SQL). Receiver excluded from the check, the receiver is
|
||||
// never the payload. Closes openmrs / xwiki / keycloak
|
||||
// Hibernate-DAO FP cluster.
|
||||
if !has_taint && sink_args_jpa_criteria_query_safe(ctx, *sink, sink_caps) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Static-map suppression: the SSA value flowing into the sink is
|
||||
// proved by the static-HashMap-lookup idiom detector to be a
|
||||
// finite set of literals free of shell metacharacters. Mirrors
|
||||
|
|
|
|||
|
|
@ -88,7 +88,21 @@ pub struct BodyConstFacts {
|
|||
/// Lower a body to SSA and run constant propagation. Returns `None` when
|
||||
/// lowering fails (empty CFG, invalid entry), callers treat absence as
|
||||
/// "no SSA facts available" and fall back to the syntactic path.
|
||||
/// Perf-regression sentinel: total cumulative calls to
|
||||
/// [`build_body_const_facts`] across the process lifetime.
|
||||
///
|
||||
/// Used by the `analyse_file_fused_large_go` criterion bench in
|
||||
/// `benches/scan_bench.rs` to assert the per-file
|
||||
/// [`crate::ast`]`::ParsedFile::body_const_facts_cache` is collapsing the
|
||||
/// per-body re-lowering (~149 calls per file expected; pre-cache was ~447).
|
||||
/// The atomic increment is ~1 ns per call and disappears in the noise of
|
||||
/// the multi-millisecond SSA lowering it gates.
|
||||
#[doc(hidden)]
|
||||
pub static BUILD_BODY_CONST_FACTS_CALLS: std::sync::atomic::AtomicU64 =
|
||||
std::sync::atomic::AtomicU64::new(0);
|
||||
|
||||
pub fn build_body_const_facts(body: &crate::cfg::BodyCfg, lang: Lang) -> Option<BodyConstFacts> {
|
||||
BUILD_BODY_CONST_FACTS_CALLS.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
let mut ssa = crate::ssa::lower_to_ssa_with_params(
|
||||
&body.graph,
|
||||
body.entry,
|
||||
|
|
|
|||
|
|
@ -1743,6 +1743,17 @@ pub(crate) fn scan_filesystem_with_observer(
|
|||
local_gs.insert_auth(key, auth_sum);
|
||||
}
|
||||
|
||||
// Insert per-Python-file router-dep facts so
|
||||
// pass 2's auth analysis can lift FastAPI
|
||||
// router-level `dependencies=[Security(...)]`
|
||||
// declarations across the
|
||||
// `<parent>.include_router(<this_file>.<router>,
|
||||
// ...)` boundary — the canonical airflow
|
||||
// execution-API auth shape.
|
||||
if let Some((module_id, facts)) = r.router_facts {
|
||||
local_gs.insert_router_facts(module_id, facts);
|
||||
}
|
||||
|
||||
// Record language for progress
|
||||
if let Some(p) = progress {
|
||||
if let Some(ref lang) = first_lang {
|
||||
|
|
|
|||
|
|
@ -185,6 +185,7 @@ fn type_kind_index(kind: &TypeKind) -> u32 {
|
|||
TypeKind::HttpClient => 11,
|
||||
TypeKind::LocalCollection => 12,
|
||||
TypeKind::RequestBuilder => 13,
|
||||
TypeKind::JpaCriteriaQuery => 14,
|
||||
// the analysis DTO types carry per-field structural info that the
|
||||
// bitset domain can't represent. Collapse to Unknown so callers
|
||||
// still see "any type possible" rather than crashing on an
|
||||
|
|
@ -210,6 +211,7 @@ fn type_kind_from_index(idx: u32) -> Option<TypeKind> {
|
|||
11 => Some(TypeKind::HttpClient),
|
||||
12 => Some(TypeKind::LocalCollection),
|
||||
13 => Some(TypeKind::RequestBuilder),
|
||||
14 => Some(TypeKind::JpaCriteriaQuery),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -799,6 +799,33 @@ fn phase_c_auth_rules_for_lang(lang_slug: &str) -> Vec<RuntimeLabelRule> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Look up a *receiver-side* validator for the given callee name.
|
||||
///
|
||||
/// Returns `Some(cap)` when the callee is registered as a method-call
|
||||
/// validator that strips `cap` from its receiver (and other call
|
||||
/// equivalents) on success. Distinct from the `Sanitizer` label,
|
||||
/// which clears caps from the *return value*. Used by the Call
|
||||
/// transfer to model idioms like `path.relative_to(base)` whose
|
||||
/// observable effect on data flow is "the receiver is validated"
|
||||
/// rather than "the return value is sanitised".
|
||||
pub fn lookup_receiver_validator(lang: &str, callee: &str) -> Option<Cap> {
|
||||
let table: &[(&str, Cap)] = match lang {
|
||||
"python" | "py" => python::RECEIVER_VALIDATORS,
|
||||
_ => return None,
|
||||
};
|
||||
let head = callee.split(['(', '<']).next().unwrap_or(callee);
|
||||
let trimmed = head.trim().as_bytes();
|
||||
let normalized = normalize_chained_call(callee);
|
||||
let norm = normalized.as_bytes();
|
||||
for (name, cap) in table {
|
||||
let m = name.as_bytes();
|
||||
if match_suffix_cs(trimmed, m, false) || match_suffix_cs(norm, m, false) {
|
||||
return Some(*cap);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Public re-export used by `ParsedFile::from_source` to
|
||||
/// augment per-file rule sets when imports reveal frameworks that the
|
||||
/// manifest-level detector missed.
|
||||
|
|
@ -1471,6 +1498,26 @@ pub fn custom_rule_id(lang: &str, kind: &str, matchers: &[String]) -> String {
|
|||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn receiver_validator_python_relative_to() {
|
||||
// Bare method name fires.
|
||||
assert_eq!(
|
||||
lookup_receiver_validator("python", "relative_to"),
|
||||
Some(Cap::FILE_IO)
|
||||
);
|
||||
// Dotted-method-call form (chained receiver).
|
||||
assert_eq!(
|
||||
lookup_receiver_validator("python", "filepath.relative_to"),
|
||||
Some(Cap::FILE_IO)
|
||||
);
|
||||
// Other languages without a registry entry return None.
|
||||
assert_eq!(lookup_receiver_validator("rust", "relative_to"), None);
|
||||
assert_eq!(lookup_receiver_validator("javascript", "relative_to"), None);
|
||||
// Unrelated callees return None.
|
||||
assert_eq!(lookup_receiver_validator("python", "resolve"), None);
|
||||
assert_eq!(lookup_receiver_validator("python", "joinpath"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bare_method_name_strips_chain() {
|
||||
// No-dot input → returned as-is.
|
||||
|
|
|
|||
|
|
@ -133,10 +133,15 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// NOTE: `file_get_contents` can fetch URLs (SSRF vector) and local files (LFI vector).
|
||||
// As a Sink(SSRF) it only fires when the argument is tainted.
|
||||
// NOTE: `file_get_contents` and `fopen` can fetch URLs (SSRF vector) and
|
||||
// local files (LFI vector — `file://` scheme). As a Sink(SSRF) they only
|
||||
// fire when the argument is tainted. `fopen` is the canonical low-level
|
||||
// stream-opening API used by media-import / OEmbed / podcast pipelines
|
||||
// (CVE-2026-33486 in roadiz/documents wraps `fopen($url, 'r')` in a
|
||||
// public `DownloadedFile::fromUrl` static method that any authenticated
|
||||
// backend caller can drive with attacker-controlled URLs).
|
||||
LabelRule {
|
||||
matchers: &["file_get_contents", "curl_exec"],
|
||||
matchers: &["file_get_contents", "curl_exec", "fopen"],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
|
|
@ -232,6 +237,11 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
"anonymous_function_creation_expression" => Kind::Function,
|
||||
"arrow_function" => Kind::Function,
|
||||
"class_declaration" => Kind::Block,
|
||||
"declaration_list" => Kind::Block,
|
||||
"interface_declaration" => Kind::Block,
|
||||
"trait_declaration" => Kind::Block,
|
||||
"enum_declaration" => Kind::Block,
|
||||
"enum_declaration_list" => Kind::Block,
|
||||
|
||||
// data-flow
|
||||
"function_call_expression" => Kind::CallFn,
|
||||
|
|
|
|||
|
|
@ -25,6 +25,10 @@ pub static RULES: &[LabelRule] = &[
|
|||
"request.url",
|
||||
"request.base_url",
|
||||
"request.host",
|
||||
"request.match_info",
|
||||
"request.rel_url",
|
||||
"request.query",
|
||||
"request.path",
|
||||
// Common alias: from flask import request as flask_request
|
||||
"flask_request.args",
|
||||
"flask_request.form",
|
||||
|
|
@ -227,7 +231,15 @@ pub static RULES: &[LabelRule] = &[
|
|||
case_sensitive: false,
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["send_file", "send_from_directory"],
|
||||
matchers: &[
|
||||
"send_file",
|
||||
"send_from_directory",
|
||||
// aiohttp file response — sends file at the supplied path,
|
||||
// semantically identical to Flask's send_file (CVE-2024-23334).
|
||||
"FileResponse",
|
||||
"web.FileResponse",
|
||||
"aiohttp.web.FileResponse",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::FILE_IO),
|
||||
case_sensitive: false,
|
||||
},
|
||||
|
|
@ -274,6 +286,25 @@ pub static RULES: &[LabelRule] = &[
|
|||
},
|
||||
];
|
||||
|
||||
/// Method-call validators that strip caps from their *receiver* (and
|
||||
/// any equivalence-class-shaped args) on success, instead of clearing
|
||||
/// the return value. Distinct from `RULES`'s `Sanitizer` label, which
|
||||
/// only clears the return — a poor fit for idioms whose effect is
|
||||
/// raise-on-failure rather than value-replacement.
|
||||
///
|
||||
/// Modeled idioms:
|
||||
///
|
||||
/// * `path.relative_to(base)` (pathlib) — raises `ValueError` if `path`
|
||||
/// is not under `base`. After a successful return, the receiver is
|
||||
/// path-contained in `base`. Strips `Cap::FILE_IO`. Motivated by
|
||||
/// CVE-2024-23334 (aiohttp StaticResource symlink-bypass) where the
|
||||
/// patched code calls `filepath.relative_to(self._directory)` inside
|
||||
/// a try/except and serves `filepath` afterwards.
|
||||
pub static RECEIVER_VALIDATORS: &[(&str, Cap)] = &[
|
||||
("relative_to", Cap::FILE_IO),
|
||||
(".relative_to", Cap::FILE_IO),
|
||||
];
|
||||
|
||||
pub static GATED_SINKS: &[SinkGate] = &[
|
||||
// Legacy single-kwarg gate retained for back-compat: Popen(cmd, shell=True).
|
||||
SinkGate {
|
||||
|
|
|
|||
|
|
@ -206,4 +206,26 @@ pub const PATTERNS: &[Pattern] = &[
|
|||
category: PatternCategory::Xss,
|
||||
confidence: Confidence::High,
|
||||
},
|
||||
// Flask `make_response(<f-string-or-concat>)` reflection — Tier B
|
||||
// heuristic mirroring `py.sqli.execute_format` / `py.sqli.text_format`.
|
||||
// Catches CVE-2023-6568 (mlflow auth `create_user` reflected the
|
||||
// attacker-controlled `Content-Type` header into the response body
|
||||
// via `make_response(f"Invalid content type: '{content_type}'", 400)`)
|
||||
// and the equivalent `+`-concat shape. Recognises both bare
|
||||
// `make_response(...)` and `flask.make_response(...)`.
|
||||
Pattern {
|
||||
id: "py.xss.make_response_format",
|
||||
description: "flask make_response with f-string or concat risks reflected XSS",
|
||||
query: r#"(call
|
||||
function: [(identifier) @fn (attribute attribute: (identifier) @fn)]
|
||||
(#eq? @fn "make_response")
|
||||
arguments: (argument_list
|
||||
[(binary_operator)
|
||||
(string (interpolation))] @arg))
|
||||
@vuln"#,
|
||||
severity: Severity::Medium,
|
||||
tier: PatternTier::B,
|
||||
category: PatternCategory::Xss,
|
||||
confidence: Confidence::Medium,
|
||||
},
|
||||
];
|
||||
|
|
|
|||
|
|
@ -1180,6 +1180,7 @@ fn type_kind_tag(k: &TypeKind) -> String {
|
|||
TypeKind::HttpClient => "HttpClient".into(),
|
||||
TypeKind::LocalCollection => "LocalCollection".into(),
|
||||
TypeKind::RequestBuilder => "RequestBuilder".into(),
|
||||
TypeKind::JpaCriteriaQuery => "JpaCriteriaQuery".into(),
|
||||
TypeKind::Dto(_) => "Dto".into(),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use smallvec::SmallVec;
|
||||
|
||||
use super::ir::*;
|
||||
|
||||
|
|
@ -96,40 +97,56 @@ pub struct ConstPropResult {
|
|||
}
|
||||
|
||||
/// Run Sparse Conditional Constant Propagation on an SSA body.
|
||||
///
|
||||
/// Internal storage is dense `Vec`-indexed by [`SsaValue`] / [`BlockId`] to
|
||||
/// avoid the per-lookup `SipHash` cost of `HashMap<SsaValue, _>` /
|
||||
/// `HashSet<(BlockId, BlockId)>` that previously dominated the inner
|
||||
/// fixed-point loop. The public [`ConstPropResult`] still exposes the
|
||||
/// `HashMap`-shaped contract; the conversion at the end of the function is
|
||||
/// O(num_values) and runs once.
|
||||
pub fn const_propagate(body: &SsaBody) -> ConstPropResult {
|
||||
let num_blocks = body.blocks.len();
|
||||
let num_values = body.value_defs.len();
|
||||
|
||||
// Per-value lattice: starts at Top
|
||||
let mut values: HashMap<SsaValue, ConstLattice> = HashMap::new();
|
||||
// Dense per-value lattice (`Vec` indexed by `SsaValue.0`). All values
|
||||
// are defined by exactly one inst (phi or body), so initialising the
|
||||
// entire range to Top is equivalent to the previous per-inst insert
|
||||
// pass at strictly lower cost (no hashing).
|
||||
let mut values: Vec<ConstLattice> = vec![ConstLattice::Top; num_values];
|
||||
|
||||
// Executable flags per CFG edge (from_block, to_block)
|
||||
let mut executable_edges: HashSet<(BlockId, BlockId)> = HashSet::new();
|
||||
// Executable blocks
|
||||
let mut executable_blocks: HashSet<BlockId> = HashSet::new();
|
||||
// Per-block executability and per-(dest, pred) executable-edge bitmap.
|
||||
// Edges are stored as a per-destination list of executable predecessors
|
||||
// — phi evaluation only ever asks "is `(pred, this_block)` executable?",
|
||||
// so a tiny SmallVec scan over the dest's predecessors beats a
|
||||
// `HashSet<(BlockId, BlockId)>::contains` (which hashes a 64-bit pair
|
||||
// for every operand of every phi).
|
||||
let mut executable_blocks: Vec<bool> = vec![false; num_blocks];
|
||||
let mut executable_preds: Vec<SmallVec<[BlockId; 2]>> = vec![SmallVec::new(); num_blocks];
|
||||
|
||||
// Two worklists
|
||||
// Worklists
|
||||
let mut cfg_worklist: VecDeque<BlockId> = VecDeque::new();
|
||||
let mut ssa_worklist: VecDeque<SsaValue> = VecDeque::new();
|
||||
|
||||
// Mark entry executable
|
||||
executable_blocks.insert(body.entry);
|
||||
executable_blocks[body.entry.0 as usize] = true;
|
||||
cfg_worklist.push_back(body.entry);
|
||||
|
||||
// Build use-map: SsaValue → list of (BlockId, instruction index in block)
|
||||
// so we can propagate SSA value changes efficiently.
|
||||
let mut use_sites: HashMap<SsaValue, Vec<BlockId>> = HashMap::new();
|
||||
// Use-map: dense `Vec` indexed by `SsaValue.0`. Populated in a single
|
||||
// pass via the closure-based [`inst_uses_each`] helper, which avoids
|
||||
// the heap allocation of the prior `inst_uses() -> Vec<SsaValue>`
|
||||
// factory.
|
||||
let mut use_sites: Vec<SmallVec<[BlockId; 2]>> = vec![SmallVec::new(); num_values];
|
||||
for block in &body.blocks {
|
||||
for inst in block.phis.iter().chain(block.body.iter()) {
|
||||
for used_val in inst_uses(inst) {
|
||||
use_sites.entry(used_val).or_default().push(block.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize all values to Top
|
||||
for block in &body.blocks {
|
||||
for inst in block.phis.iter().chain(block.body.iter()) {
|
||||
values.insert(inst.value, ConstLattice::Top);
|
||||
inst_uses_each(inst, |used_val| {
|
||||
let idx = used_val.0 as usize;
|
||||
if idx < use_sites.len() {
|
||||
let bucket = &mut use_sites[idx];
|
||||
if bucket.last() != Some(&block.id) {
|
||||
bucket.push(block.id);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -144,10 +161,10 @@ pub fn const_propagate(body: &SsaBody) -> ConstPropResult {
|
|||
// Evaluate phis
|
||||
for phi in &block.phis {
|
||||
if let SsaOp::Phi(operands) = &phi.op {
|
||||
let old = values.get(&phi.value).cloned().unwrap_or(ConstLattice::Top);
|
||||
let new_val = eval_phi(operands, &values, &executable_edges, block_id);
|
||||
let old = lookup(&values, phi.value);
|
||||
let new_val = eval_phi(operands, &values, &executable_preds, block_id);
|
||||
if new_val != old {
|
||||
values.insert(phi.value, new_val);
|
||||
store(&mut values, phi.value, new_val);
|
||||
ssa_worklist.push_back(phi.value);
|
||||
changed = true;
|
||||
}
|
||||
|
|
@ -156,13 +173,10 @@ pub fn const_propagate(body: &SsaBody) -> ConstPropResult {
|
|||
|
||||
// Evaluate body instructions
|
||||
for inst in &block.body {
|
||||
let old = values
|
||||
.get(&inst.value)
|
||||
.cloned()
|
||||
.unwrap_or(ConstLattice::Top);
|
||||
let old = lookup(&values, inst.value);
|
||||
let new_val = eval_inst(inst, &values);
|
||||
if new_val != old {
|
||||
values.insert(inst.value, new_val);
|
||||
store(&mut values, inst.value, new_val);
|
||||
ssa_worklist.push_back(inst.value);
|
||||
changed = true;
|
||||
}
|
||||
|
|
@ -173,7 +187,7 @@ pub fn const_propagate(body: &SsaBody) -> ConstPropResult {
|
|||
block,
|
||||
body,
|
||||
&values,
|
||||
&mut executable_edges,
|
||||
&mut executable_preds,
|
||||
&mut executable_blocks,
|
||||
&mut cfg_worklist,
|
||||
);
|
||||
|
|
@ -181,54 +195,57 @@ pub fn const_propagate(body: &SsaBody) -> ConstPropResult {
|
|||
|
||||
// Process SSA worklist
|
||||
while let Some(val) = ssa_worklist.pop_front() {
|
||||
if let Some(blocks) = use_sites.get(&val) {
|
||||
for &block_id in blocks {
|
||||
if !executable_blocks.contains(&block_id) {
|
||||
continue;
|
||||
}
|
||||
let block = body.block(block_id);
|
||||
|
||||
// Re-evaluate phis using this value
|
||||
for phi in &block.phis {
|
||||
if let SsaOp::Phi(operands) = &phi.op
|
||||
&& operands.iter().any(|(_, v)| *v == val)
|
||||
{
|
||||
let old = values.get(&phi.value).cloned().unwrap_or(ConstLattice::Top);
|
||||
let new_val = eval_phi(operands, &values, &executable_edges, block_id);
|
||||
if new_val != old {
|
||||
values.insert(phi.value, new_val);
|
||||
ssa_worklist.push_back(phi.value);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Re-evaluate body instructions using this value
|
||||
for inst in &block.body {
|
||||
if inst_uses(inst).contains(&val) {
|
||||
let old = values
|
||||
.get(&inst.value)
|
||||
.cloned()
|
||||
.unwrap_or(ConstLattice::Top);
|
||||
let new_val = eval_inst(inst, &values);
|
||||
if new_val != old {
|
||||
values.insert(inst.value, new_val);
|
||||
ssa_worklist.push_back(inst.value);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Re-evaluate terminator if condition changed
|
||||
process_terminator(
|
||||
block,
|
||||
body,
|
||||
&values,
|
||||
&mut executable_edges,
|
||||
&mut executable_blocks,
|
||||
&mut cfg_worklist,
|
||||
);
|
||||
let val_idx = val.0 as usize;
|
||||
if val_idx >= use_sites.len() {
|
||||
continue;
|
||||
}
|
||||
// Snapshot the use-list so we can borrow `values` mutably
|
||||
// while iterating block ids. The list is short (typically
|
||||
// 1–3 blocks) so the clone is cheap.
|
||||
let use_blocks = use_sites[val_idx].clone();
|
||||
for block_id in use_blocks {
|
||||
if !executable_blocks[block_id.0 as usize] {
|
||||
continue;
|
||||
}
|
||||
let block = body.block(block_id);
|
||||
|
||||
// Re-evaluate phis using this value
|
||||
for phi in &block.phis {
|
||||
if let SsaOp::Phi(operands) = &phi.op
|
||||
&& operands.iter().any(|(_, v)| *v == val)
|
||||
{
|
||||
let old = lookup(&values, phi.value);
|
||||
let new_val = eval_phi(operands, &values, &executable_preds, block_id);
|
||||
if new_val != old {
|
||||
store(&mut values, phi.value, new_val);
|
||||
ssa_worklist.push_back(phi.value);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Re-evaluate body instructions using this value
|
||||
for inst in &block.body {
|
||||
if inst_has_use(inst, val) {
|
||||
let old = lookup(&values, inst.value);
|
||||
let new_val = eval_inst(inst, &values);
|
||||
if new_val != old {
|
||||
store(&mut values, inst.value, new_val);
|
||||
ssa_worklist.push_back(inst.value);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Re-evaluate terminator if condition changed
|
||||
process_terminator(
|
||||
block,
|
||||
body,
|
||||
&values,
|
||||
&mut executable_preds,
|
||||
&mut executable_blocks,
|
||||
&mut cfg_worklist,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -237,44 +254,79 @@ pub fn const_propagate(body: &SsaBody) -> ConstPropResult {
|
|||
}
|
||||
}
|
||||
|
||||
// Compute unreachable blocks
|
||||
let unreachable_blocks: HashSet<BlockId> = (0..num_blocks)
|
||||
.map(|i| BlockId(i as u32))
|
||||
.filter(|bid| !executable_blocks.contains(bid))
|
||||
.collect();
|
||||
// Convert dense storage to the public `HashMap`-shaped result. Walks
|
||||
// the value vector exactly once. The unreachable-blocks set is small
|
||||
// (often empty), so building it from a linear scan is fine.
|
||||
let mut out_values: HashMap<SsaValue, ConstLattice> = HashMap::with_capacity(num_values);
|
||||
for (i, v) in values.into_iter().enumerate() {
|
||||
out_values.insert(SsaValue(i as u32), v);
|
||||
}
|
||||
let mut unreachable_blocks: HashSet<BlockId> = HashSet::new();
|
||||
for (i, exec) in executable_blocks.iter().enumerate() {
|
||||
if !exec {
|
||||
unreachable_blocks.insert(BlockId(i as u32));
|
||||
}
|
||||
}
|
||||
|
||||
ConstPropResult {
|
||||
values,
|
||||
values: out_values,
|
||||
unreachable_blocks,
|
||||
}
|
||||
}
|
||||
|
||||
/// Dense lattice lookup. Returns Top for out-of-range values to match the
|
||||
/// pre-refactor `HashMap::get(&v).cloned().unwrap_or(Top)` semantics.
|
||||
#[inline]
|
||||
fn lookup(values: &[ConstLattice], v: SsaValue) -> ConstLattice {
|
||||
values
|
||||
.get(v.0 as usize)
|
||||
.cloned()
|
||||
.unwrap_or(ConstLattice::Top)
|
||||
}
|
||||
|
||||
/// Dense lattice store. Out-of-range writes are silently dropped to
|
||||
/// preserve robustness against malformed SSA input — the prior HashMap
|
||||
/// path would have inserted a stray entry; the dense path leaves it
|
||||
/// implicit (Top). Either way the value is unobservable downstream
|
||||
/// because no use-map entry would point at it.
|
||||
#[inline]
|
||||
fn store(values: &mut [ConstLattice], v: SsaValue, val: ConstLattice) {
|
||||
let idx = v.0 as usize;
|
||||
if idx < values.len() {
|
||||
values[idx] = val;
|
||||
}
|
||||
}
|
||||
|
||||
/// Evaluate a phi: meet of operands from executable predecessors.
|
||||
fn eval_phi(
|
||||
operands: &[(BlockId, SsaValue)],
|
||||
values: &HashMap<SsaValue, ConstLattice>,
|
||||
executable_edges: &HashSet<(BlockId, BlockId)>,
|
||||
values: &[ConstLattice],
|
||||
executable_preds: &[SmallVec<[BlockId; 2]>],
|
||||
this_block: BlockId,
|
||||
) -> ConstLattice {
|
||||
let preds = executable_preds
|
||||
.get(this_block.0 as usize)
|
||||
.map(|p| p.as_slice())
|
||||
.unwrap_or(&[]);
|
||||
let mut result = ConstLattice::Top;
|
||||
for (pred_block, val) in operands {
|
||||
if !executable_edges.contains(&(*pred_block, this_block)) {
|
||||
if !preds.contains(pred_block) {
|
||||
continue; // skip non-executable predecessors
|
||||
}
|
||||
let operand_val = values.get(val).cloned().unwrap_or(ConstLattice::Top);
|
||||
let operand_val = lookup(values, *val);
|
||||
result = result.meet(&operand_val);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
/// Evaluate a single instruction.
|
||||
fn eval_inst(inst: &SsaInst, values: &HashMap<SsaValue, ConstLattice>) -> ConstLattice {
|
||||
fn eval_inst(inst: &SsaInst, values: &[ConstLattice]) -> ConstLattice {
|
||||
match &inst.op {
|
||||
SsaOp::Const(Some(text)) => ConstLattice::parse(text),
|
||||
SsaOp::Const(None) => ConstLattice::Varying, // unknown constant
|
||||
SsaOp::Assign(uses) if uses.len() == 1 => {
|
||||
// Copy: propagate the source's value
|
||||
values.get(&uses[0]).cloned().unwrap_or(ConstLattice::Top)
|
||||
lookup(values, uses[0])
|
||||
}
|
||||
SsaOp::Assign(_) => ConstLattice::Varying, // expression with multiple uses
|
||||
SsaOp::Call { .. }
|
||||
|
|
@ -297,29 +349,69 @@ fn eval_inst(inst: &SsaInst, values: &HashMap<SsaValue, ConstLattice>) -> ConstL
|
|||
}
|
||||
}
|
||||
|
||||
/// Collect SSA values used by an instruction (for use-map building).
|
||||
fn inst_uses(inst: &SsaInst) -> Vec<SsaValue> {
|
||||
/// Apply a closure to every SSA value used by an instruction. Avoids the
|
||||
/// `Vec<SsaValue>` heap allocation that the previous `inst_uses(inst)`
|
||||
/// helper paid on every call (use-map build is O(num_insts), the prior
|
||||
/// path bottle-necked there).
|
||||
#[inline]
|
||||
fn inst_uses_each<F: FnMut(SsaValue)>(inst: &SsaInst, mut f: F) {
|
||||
match &inst.op {
|
||||
SsaOp::Phi(operands) => operands.iter().map(|(_, v)| *v).collect(),
|
||||
SsaOp::Assign(uses) => uses.to_vec(),
|
||||
SsaOp::Phi(operands) => {
|
||||
for (_, v) in operands {
|
||||
f(*v);
|
||||
}
|
||||
}
|
||||
SsaOp::Assign(uses) => {
|
||||
for v in uses {
|
||||
f(*v);
|
||||
}
|
||||
}
|
||||
SsaOp::Call { args, receiver, .. } => {
|
||||
let mut vals = Vec::new();
|
||||
if let Some(rv) = receiver {
|
||||
vals.push(*rv);
|
||||
f(*rv);
|
||||
}
|
||||
for arg in args {
|
||||
vals.extend(arg.iter());
|
||||
for v in arg {
|
||||
f(*v);
|
||||
}
|
||||
}
|
||||
vals
|
||||
}
|
||||
SsaOp::FieldProj { receiver, .. } => vec![*receiver],
|
||||
SsaOp::FieldProj { receiver, .. } => f(*receiver),
|
||||
SsaOp::Source
|
||||
| SsaOp::Const(_)
|
||||
| SsaOp::Param { .. }
|
||||
| SsaOp::SelfParam
|
||||
| SsaOp::CatchParam
|
||||
| SsaOp::Nop
|
||||
| SsaOp::Undef => Vec::new(),
|
||||
| SsaOp::Undef => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Zero-allocation predicate: does `inst` use `target` as an operand?
|
||||
/// Replaces the prior `inst_uses(inst).contains(&target)` shape, which
|
||||
/// allocated a fresh `Vec<SsaValue>` on every check inside the SCCP
|
||||
/// re-evaluation worklist.
|
||||
#[inline]
|
||||
fn inst_has_use(inst: &SsaInst, target: SsaValue) -> bool {
|
||||
match &inst.op {
|
||||
SsaOp::Phi(operands) => operands.iter().any(|(_, v)| *v == target),
|
||||
SsaOp::Assign(uses) => uses.contains(&target),
|
||||
SsaOp::Call { args, receiver, .. } => {
|
||||
if let Some(rv) = receiver
|
||||
&& *rv == target
|
||||
{
|
||||
return true;
|
||||
}
|
||||
args.iter().any(|arg| arg.contains(&target))
|
||||
}
|
||||
SsaOp::FieldProj { receiver, .. } => *receiver == target,
|
||||
SsaOp::Source
|
||||
| SsaOp::Const(_)
|
||||
| SsaOp::Param { .. }
|
||||
| SsaOp::SelfParam
|
||||
| SsaOp::CatchParam
|
||||
| SsaOp::Nop
|
||||
| SsaOp::Undef => false,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -327,9 +419,9 @@ fn inst_uses(inst: &SsaInst) -> Vec<SsaValue> {
|
|||
fn process_terminator(
|
||||
block: &SsaBlock,
|
||||
body: &SsaBody,
|
||||
values: &HashMap<SsaValue, ConstLattice>,
|
||||
executable_edges: &mut HashSet<(BlockId, BlockId)>,
|
||||
executable_blocks: &mut HashSet<BlockId>,
|
||||
values: &[ConstLattice],
|
||||
executable_preds: &mut [SmallVec<[BlockId; 2]>],
|
||||
executable_blocks: &mut [bool],
|
||||
cfg_worklist: &mut VecDeque<BlockId>,
|
||||
) {
|
||||
match &block.terminator {
|
||||
|
|
@ -343,7 +435,7 @@ fn process_terminator(
|
|||
mark_edge_executable(
|
||||
block.id,
|
||||
target,
|
||||
executable_edges,
|
||||
executable_preds,
|
||||
executable_blocks,
|
||||
cfg_worklist,
|
||||
);
|
||||
|
|
@ -359,7 +451,7 @@ fn process_terminator(
|
|||
let cond_val = body
|
||||
.cfg_node_map
|
||||
.get(cond)
|
||||
.and_then(|v| values.get(v))
|
||||
.map(|v| lookup(values, *v))
|
||||
.and_then(|c| c.as_bool());
|
||||
|
||||
match cond_val {
|
||||
|
|
@ -367,7 +459,7 @@ fn process_terminator(
|
|||
mark_edge_executable(
|
||||
block.id,
|
||||
*true_blk,
|
||||
executable_edges,
|
||||
executable_preds,
|
||||
executable_blocks,
|
||||
cfg_worklist,
|
||||
);
|
||||
|
|
@ -376,7 +468,7 @@ fn process_terminator(
|
|||
mark_edge_executable(
|
||||
block.id,
|
||||
*false_blk,
|
||||
executable_edges,
|
||||
executable_preds,
|
||||
executable_blocks,
|
||||
cfg_worklist,
|
||||
);
|
||||
|
|
@ -386,14 +478,14 @@ fn process_terminator(
|
|||
mark_edge_executable(
|
||||
block.id,
|
||||
*true_blk,
|
||||
executable_edges,
|
||||
executable_preds,
|
||||
executable_blocks,
|
||||
cfg_worklist,
|
||||
);
|
||||
mark_edge_executable(
|
||||
block.id,
|
||||
*false_blk,
|
||||
executable_edges,
|
||||
executable_preds,
|
||||
executable_blocks,
|
||||
cfg_worklist,
|
||||
);
|
||||
|
|
@ -417,7 +509,7 @@ fn process_terminator(
|
|||
mark_edge_executable(
|
||||
block.id,
|
||||
target,
|
||||
executable_edges,
|
||||
executable_preds,
|
||||
executable_blocks,
|
||||
cfg_worklist,
|
||||
);
|
||||
|
|
@ -432,7 +524,7 @@ fn process_terminator(
|
|||
mark_edge_executable(
|
||||
block.id,
|
||||
target,
|
||||
executable_edges,
|
||||
executable_preds,
|
||||
executable_blocks,
|
||||
cfg_worklist,
|
||||
);
|
||||
|
|
@ -444,18 +536,27 @@ fn process_terminator(
|
|||
fn mark_edge_executable(
|
||||
from: BlockId,
|
||||
to: BlockId,
|
||||
executable_edges: &mut HashSet<(BlockId, BlockId)>,
|
||||
executable_blocks: &mut HashSet<BlockId>,
|
||||
executable_preds: &mut [SmallVec<[BlockId; 2]>],
|
||||
executable_blocks: &mut [bool],
|
||||
cfg_worklist: &mut VecDeque<BlockId>,
|
||||
) {
|
||||
if executable_edges.insert((from, to)) {
|
||||
if executable_blocks.insert(to) {
|
||||
cfg_worklist.push_back(to);
|
||||
} else {
|
||||
// Block already executable but new edge, re-evaluate phis
|
||||
cfg_worklist.push_back(to);
|
||||
}
|
||||
let to_idx = to.0 as usize;
|
||||
if to_idx >= executable_preds.len() {
|
||||
return;
|
||||
}
|
||||
let preds = &mut executable_preds[to_idx];
|
||||
if preds.contains(&from) {
|
||||
return;
|
||||
}
|
||||
preds.push(from);
|
||||
let was_already_exec = executable_blocks[to_idx];
|
||||
if !was_already_exec {
|
||||
executable_blocks[to_idx] = true;
|
||||
}
|
||||
// Always re-enqueue: either the block became newly reachable, or it
|
||||
// already was but a new predecessor edge means phi operands need
|
||||
// re-meeting against the now-executable predecessor.
|
||||
cfg_worklist.push_back(to);
|
||||
}
|
||||
|
||||
/// Apply constant propagation results: prune branches where condition is known constant.
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ use super::ir::*;
|
|||
use crate::cfg::{BinOp, Cfg};
|
||||
use crate::symbol::Lang;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use smallvec::SmallVec;
|
||||
|
||||
/// Inferred type kind for an SSA value.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
|
|
@ -40,6 +41,17 @@ pub enum TypeKind {
|
|||
/// `label_prefix`, never participates in label-based callee
|
||||
/// resolution.
|
||||
LocalCollection,
|
||||
/// A JPA / Hibernate Criteria API query object (`CriteriaQuery<T>`,
|
||||
/// `CriteriaUpdate<T>`, `CriteriaDelete<T>`, `Subquery<T>`,
|
||||
/// `TypedQuery<T>`). These objects are produced by the
|
||||
/// `CriteriaBuilder` and emit parameterized SQL when handed to
|
||||
/// `Session.createQuery(cq)` / `EntityManager.createQuery(cq)`. The
|
||||
/// argument is structural (predicate AST), not a string, so SQL
|
||||
/// injection cannot flow through it. Used to suppress the
|
||||
/// `cfg-unguarded-sink` finding on `session.createQuery(cq)` shapes
|
||||
/// where openmrs / xwiki / keycloak Hibernate DAOs build queries
|
||||
/// via `cb.createQuery(Foo.class)` + `Root` / `Predicate` API.
|
||||
JpaCriteriaQuery,
|
||||
/// A framework-injected DTO body whose field types are known.
|
||||
/// Populated when a parameter is recognised as a typed extractor and
|
||||
/// the DTO class / struct / Pydantic model is resolvable in scope.
|
||||
|
|
@ -86,6 +98,7 @@ impl TypeKind {
|
|||
Self::FileHandle => Some("FileHandle"),
|
||||
Self::Url => Some("URL"),
|
||||
Self::RequestBuilder => Some("RequestBuilder"),
|
||||
Self::JpaCriteriaQuery => Some("JpaCriteriaQuery"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
@ -222,6 +235,111 @@ pub fn is_type_safe_for_sink(
|
|||
})
|
||||
}
|
||||
|
||||
/// Check whether any of the sink-arg SSA values is a structural query
|
||||
/// object that emits parameterized SQL by construction (currently the
|
||||
/// JPA / Hibernate Criteria API: `CriteriaQuery`, `CriteriaUpdate`,
|
||||
/// `CriteriaDelete`, `Subquery`, `TypedQuery`).
|
||||
///
|
||||
/// Used by both the SSA taint engine and the structural
|
||||
/// `cfg-unguarded-sink` analysis to suppress the SQL-injection finding
|
||||
/// on `session.createQuery(cq)` / `em.createQuery(cq)` / `executeUpdate`
|
||||
/// shapes where the argument is a Criteria object built via
|
||||
/// `CriteriaBuilder` rather than a string.
|
||||
///
|
||||
/// Returns `false` when `sink_caps` does not include `SQL_QUERY`, when
|
||||
/// `values` is empty, or when no value carries the
|
||||
/// [`TypeKind::JpaCriteriaQuery`] tag. Receiver values should be
|
||||
/// excluded by the caller, the receiver of a JPA query method is the
|
||||
/// `Session` / `EntityManager` channel, never the payload.
|
||||
pub fn is_safe_query_object_arg(
|
||||
values: &[SsaValue],
|
||||
sink_caps: crate::labels::Cap,
|
||||
type_facts: &TypeFactResult,
|
||||
) -> bool {
|
||||
use crate::labels::Cap;
|
||||
if !sink_caps.intersects(Cap::SQL_QUERY) {
|
||||
return false;
|
||||
}
|
||||
if values.is_empty() {
|
||||
return false;
|
||||
}
|
||||
values
|
||||
.iter()
|
||||
.any(|v| type_facts.is_type(*v, &TypeKind::JpaCriteriaQuery))
|
||||
}
|
||||
|
||||
/// Receiver-text-aware return-type inference for methods whose
|
||||
/// constructor mapping cannot be determined from the callee suffix
|
||||
/// alone.
|
||||
///
|
||||
/// The JPA `createQuery` suffix is overloaded between
|
||||
/// `CriteriaBuilder.createQuery(Class)` (returns `CriteriaQuery`, our
|
||||
/// safe-by-construction structural query object) and
|
||||
/// `Session.createQuery(String|Query)` (the executable-query
|
||||
/// constructor whose string overload IS a SQL sink). Class-literal
|
||||
/// arg shape (e.g. `Foo.class`) doesn't surface in `arg_uses` at the
|
||||
/// CFG layer, so we fall back to the receiver-text hint: if the
|
||||
/// callee path includes a `CriteriaBuilder` cast or a receiver
|
||||
/// variable named `cb` / `criteriaBuilder` / `builder`, treat the
|
||||
/// call as the criteria-builder overload.
|
||||
///
|
||||
/// Conservative: returns `None` for any other shape so
|
||||
/// [`constructor_type`] / `is_int_producing_callee` stay
|
||||
/// authoritative, and consumers see Unknown instead of a wrong
|
||||
/// type tag.
|
||||
///
|
||||
/// `_args` and `_consts` are kept on the signature so we can later
|
||||
/// add arg-shape narrowing when class-literal lowering captures
|
||||
/// `Foo.class` as an arg-use.
|
||||
fn arg_aware_call_type(
|
||||
lang: Lang,
|
||||
callee: &str,
|
||||
_args: &[SmallVec<[SsaValue; 2]>],
|
||||
_consts: &HashMap<SsaValue, ConstLattice>,
|
||||
) -> Option<TypeKind> {
|
||||
if !matches!(lang, Lang::Java) {
|
||||
return None;
|
||||
}
|
||||
let after_colons = callee.rsplit("::").next().unwrap_or(callee);
|
||||
let suffix = after_colons.rsplit('.').next().unwrap_or(after_colons);
|
||||
if suffix != "createQuery" {
|
||||
return None;
|
||||
}
|
||||
// Strip the trailing `.createQuery` segment and inspect the
|
||||
// receiver text for the criteria-builder hints. Conservative
|
||||
// text-level match, the SSA layer doesn't expose receiver-type
|
||||
// facts here yet.
|
||||
let prefix = callee.rsplit_once('.').map(|(p, _)| p).unwrap_or(callee);
|
||||
if prefix.contains("CriteriaBuilder") || receiver_is_criteria_builder(prefix) {
|
||||
Some(TypeKind::JpaCriteriaQuery)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// True when the receiver text identifies a CriteriaBuilder by
|
||||
/// idiomatic naming (`cb`, `criteriaBuilder`, `builder`,
|
||||
/// `getCriteriaBuilder()`), modulo casts and chained accesses.
|
||||
fn receiver_is_criteria_builder(receiver_text: &str) -> bool {
|
||||
// Drop trailing parenthesized portions and chained cast/syntax noise.
|
||||
let cleaned = receiver_text
|
||||
.rsplit_once(')')
|
||||
.map(|(_, tail)| tail)
|
||||
.unwrap_or(receiver_text)
|
||||
.trim();
|
||||
let cleaned = cleaned.trim_start_matches('.');
|
||||
let last_segment = cleaned
|
||||
.rsplit(['.', ':', ' '])
|
||||
.next()
|
||||
.unwrap_or(cleaned)
|
||||
.trim_matches(|c: char| c == '(' || c == ')');
|
||||
matches!(
|
||||
last_segment,
|
||||
"cb" | "criteriaBuilder" | "criteria_builder" | "builder" | "getCriteriaBuilder"
|
||||
) || receiver_text.contains("getCriteriaBuilder()")
|
||||
|| receiver_text.contains(".cb.")
|
||||
}
|
||||
|
||||
/// Infer a type from a constructor, factory, or allocator call.
|
||||
///
|
||||
/// Maps known constructor/factory/allocator patterns to security-relevant
|
||||
|
|
@ -260,6 +378,20 @@ pub(crate) fn constructor_type(lang: Lang, callee: &str) -> Option<TypeKind> {
|
|||
"FileInputStream" | "FileOutputStream" | "FileReader" | "FileWriter"
|
||||
| "BufferedReader" | "BufferedWriter" => Some(TypeKind::FileHandle),
|
||||
"getWriter" | "getOutputStream" => Some(TypeKind::HttpResponse),
|
||||
// JPA / Hibernate Criteria API factory methods. These are
|
||||
// unambiguous: `createCriteriaUpdate` / `createCriteriaDelete`
|
||||
// / `createTupleQuery` / `subquery` exist only on
|
||||
// `CriteriaBuilder` / `CriteriaQuery` and always return a
|
||||
// structural query object. `createQuery` is overloaded
|
||||
// (`CriteriaBuilder.createQuery(Class)` returns
|
||||
// `CriteriaQuery`; `Session.createQuery(String)` returns
|
||||
// `Query`), so it's gated below in
|
||||
// [`infer_call_return_type_with_args`] on the arg-0 shape
|
||||
// (a class literal) so we don't conflate the executable-
|
||||
// query overload with the criteria builder.
|
||||
"createCriteriaUpdate" | "createCriteriaDelete" | "createTupleQuery" | "subquery" => {
|
||||
Some(TypeKind::JpaCriteriaQuery)
|
||||
}
|
||||
_ => None,
|
||||
},
|
||||
Lang::JavaScript | Lang::TypeScript => match suffix {
|
||||
|
|
@ -687,9 +819,13 @@ pub fn analyze_types_with_param_types(
|
|||
}
|
||||
SsaOp::SelfParam => TypeFact::from_kind(TypeKind::Object),
|
||||
SsaOp::CatchParam => TypeFact::from_kind(TypeKind::Object),
|
||||
SsaOp::Call { callee, .. } => {
|
||||
SsaOp::Call { callee, args, .. } => {
|
||||
if let Some(ty) = lang.and_then(|l| constructor_type(l, callee)) {
|
||||
TypeFact::from_kind(ty)
|
||||
} else if let Some(ty) =
|
||||
lang.and_then(|l| arg_aware_call_type(l, callee, args, consts))
|
||||
{
|
||||
TypeFact::from_kind(ty)
|
||||
} else if is_int_producing_callee(callee) {
|
||||
TypeFact::from_kind(TypeKind::Int)
|
||||
} else {
|
||||
|
|
@ -2227,4 +2363,171 @@ mod tests {
|
|||
&result
|
||||
));
|
||||
}
|
||||
|
||||
// ── JPA Criteria query suppression (Phase: real-repo openmrs FP) ───
|
||||
//
|
||||
// These tests pin the `TypeKind::JpaCriteriaQuery` variant + the
|
||||
// `is_safe_query_object_arg` predicate + the
|
||||
// `arg_aware_call_type` receiver-text recogniser. Together they
|
||||
// close the openmrs HibernateDAO `session.createQuery(cq)` FP
|
||||
// cluster (216 → 24 cfg-unguarded-sink in openmrs).
|
||||
|
||||
/// `JpaCriteriaQuery` carries a label_prefix so type-qualified
|
||||
/// callee resolution can attach future rules.
|
||||
#[test]
|
||||
fn jpa_criteria_query_label_prefix() {
|
||||
assert_eq!(
|
||||
TypeKind::JpaCriteriaQuery.label_prefix(),
|
||||
Some("JpaCriteriaQuery")
|
||||
);
|
||||
}
|
||||
|
||||
/// `is_safe_query_object_arg` suppresses SQL_QUERY when any
|
||||
/// supplied value is a `JpaCriteriaQuery`. Receiver inclusion is
|
||||
/// the caller's responsibility, here we just verify the predicate.
|
||||
#[test]
|
||||
fn safe_query_object_arg_suppresses_sql_query() {
|
||||
use crate::labels::Cap;
|
||||
let mut facts = HashMap::new();
|
||||
facts.insert(SsaValue(0), TypeFact::from_kind(TypeKind::JpaCriteriaQuery));
|
||||
let result = TypeFactResult { facts };
|
||||
assert!(is_safe_query_object_arg(
|
||||
&[SsaValue(0)],
|
||||
Cap::SQL_QUERY,
|
||||
&result
|
||||
));
|
||||
// Other caps stay untouched.
|
||||
assert!(!is_safe_query_object_arg(
|
||||
&[SsaValue(0)],
|
||||
Cap::CODE_EXEC,
|
||||
&result
|
||||
));
|
||||
// Unknown-typed values do not trigger.
|
||||
let mut facts2 = HashMap::new();
|
||||
facts2.insert(SsaValue(0), TypeFact::from_kind(TypeKind::Unknown));
|
||||
let result2 = TypeFactResult { facts: facts2 };
|
||||
assert!(!is_safe_query_object_arg(
|
||||
&[SsaValue(0)],
|
||||
Cap::SQL_QUERY,
|
||||
&result2
|
||||
));
|
||||
// Empty slice never suppresses.
|
||||
assert!(!is_safe_query_object_arg(&[], Cap::SQL_QUERY, &result));
|
||||
}
|
||||
|
||||
/// `is_safe_query_object_arg` fires when a Criteria value is mixed
|
||||
/// in with other types — the predicate is `any`, not `all`, since
|
||||
/// the criteria-object arg is the only injection-bearing slot for a
|
||||
/// `createQuery(cq)` sink.
|
||||
#[test]
|
||||
fn safe_query_object_arg_fires_with_mixed_args() {
|
||||
use crate::labels::Cap;
|
||||
let mut facts = HashMap::new();
|
||||
facts.insert(SsaValue(0), TypeFact::from_kind(TypeKind::JpaCriteriaQuery));
|
||||
facts.insert(SsaValue(1), TypeFact::from_kind(TypeKind::String));
|
||||
facts.insert(SsaValue(2), TypeFact::from_kind(TypeKind::Unknown));
|
||||
let result = TypeFactResult { facts };
|
||||
assert!(is_safe_query_object_arg(
|
||||
&[SsaValue(0), SsaValue(1), SsaValue(2)],
|
||||
Cap::SQL_QUERY,
|
||||
&result
|
||||
));
|
||||
}
|
||||
|
||||
/// `arg_aware_call_type` maps the JPA `cb.createQuery(...)` /
|
||||
/// `criteriaBuilder.createQuery(...)` / `((CriteriaBuilder)
|
||||
/// x).createQuery(...)` shapes to `JpaCriteriaQuery`, distinct
|
||||
/// from the overloaded `session.createQuery(...)` /
|
||||
/// `em.createQuery(...)` which stays `None` (the
|
||||
/// executable-query overload).
|
||||
#[test]
|
||||
fn arg_aware_call_type_jpa_criteria_builder_recogniser() {
|
||||
let no_args: Vec<SmallVec<[SsaValue; 2]>> = vec![];
|
||||
let consts: HashMap<SsaValue, ConstLattice> = HashMap::new();
|
||||
// Receiver hint: bare `cb` ident.
|
||||
assert_eq!(
|
||||
arg_aware_call_type(Lang::Java, "cb.createQuery", &no_args, &consts),
|
||||
Some(TypeKind::JpaCriteriaQuery)
|
||||
);
|
||||
// Receiver hint: bare `criteriaBuilder` ident.
|
||||
assert_eq!(
|
||||
arg_aware_call_type(Lang::Java, "criteriaBuilder.createQuery", &no_args, &consts),
|
||||
Some(TypeKind::JpaCriteriaQuery)
|
||||
);
|
||||
// Cast in receiver text.
|
||||
assert_eq!(
|
||||
arg_aware_call_type(
|
||||
Lang::Java,
|
||||
"((CriteriaBuilder) cb).createQuery",
|
||||
&no_args,
|
||||
&consts
|
||||
),
|
||||
Some(TypeKind::JpaCriteriaQuery)
|
||||
);
|
||||
// Chained accessor: getCriteriaBuilder().createQuery
|
||||
assert_eq!(
|
||||
arg_aware_call_type(
|
||||
Lang::Java,
|
||||
"session.getCriteriaBuilder().createQuery",
|
||||
&no_args,
|
||||
&consts
|
||||
),
|
||||
Some(TypeKind::JpaCriteriaQuery)
|
||||
);
|
||||
// The executable-query overload (`session.createQuery`) does
|
||||
// NOT match — receiver-text doesn't carry a CriteriaBuilder
|
||||
// hint, so we leave the type as Unknown and let the
|
||||
// suppression decide based on the arg-0 type fact.
|
||||
assert_eq!(
|
||||
arg_aware_call_type(Lang::Java, "session.createQuery", &no_args, &consts),
|
||||
None
|
||||
);
|
||||
assert_eq!(
|
||||
arg_aware_call_type(Lang::Java, "em.createQuery", &no_args, &consts),
|
||||
None
|
||||
);
|
||||
// Non-Java langs return None.
|
||||
assert_eq!(
|
||||
arg_aware_call_type(Lang::Python, "cb.createQuery", &no_args, &consts),
|
||||
None
|
||||
);
|
||||
// Other suffixes return None.
|
||||
assert_eq!(
|
||||
arg_aware_call_type(Lang::Java, "cb.createCriteriaUpdate", &no_args, &consts),
|
||||
None
|
||||
);
|
||||
}
|
||||
|
||||
/// Unique-suffix Criteria API methods land on
|
||||
/// `TypeKind::JpaCriteriaQuery` directly via [`constructor_type`]
|
||||
/// without the receiver hint, since `createCriteriaUpdate` /
|
||||
/// `createCriteriaDelete` / `createTupleQuery` / `subquery` exist
|
||||
/// only on `CriteriaBuilder` / `CriteriaQuery` and have no
|
||||
/// overload conflict.
|
||||
#[test]
|
||||
fn constructor_type_unique_jpa_criteria_methods() {
|
||||
for suffix in &[
|
||||
"createCriteriaUpdate",
|
||||
"createCriteriaDelete",
|
||||
"createTupleQuery",
|
||||
"subquery",
|
||||
] {
|
||||
assert_eq!(
|
||||
constructor_type(Lang::Java, suffix),
|
||||
Some(TypeKind::JpaCriteriaQuery),
|
||||
"suffix `{suffix}` must map to JpaCriteriaQuery"
|
||||
);
|
||||
// Same suffix prefixed by an arbitrary receiver still maps.
|
||||
assert_eq!(
|
||||
constructor_type(Lang::Java, &format!("cb.{suffix}")),
|
||||
Some(TypeKind::JpaCriteriaQuery)
|
||||
);
|
||||
}
|
||||
// Non-criteria methods unaffected.
|
||||
assert_eq!(
|
||||
constructor_type(Lang::Java, "session.createQuery"),
|
||||
None,
|
||||
"createQuery is overloaded — must not map at constructor_type level"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ pub mod ssa_summary;
|
|||
use crate::labels::Cap;
|
||||
use crate::summary::ssa_summary::SsaFuncSummary;
|
||||
use crate::symbol::{FuncKey, FuncKind, Lang, normalize_namespace};
|
||||
use rustc_hash::FxHashMap;
|
||||
use serde::{Deserialize, Deserializer, Serialize};
|
||||
use smallvec::SmallVec;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
|
|
@ -517,15 +518,20 @@ impl<'a> CalleeQuery<'a> {
|
|||
/// for same-language resolution in the taint engine.
|
||||
#[derive(Default)]
|
||||
pub struct GlobalSummaries {
|
||||
by_key: HashMap<FuncKey, FuncSummary>,
|
||||
/// FxHashMap (rustc_hash) replaces stdlib SipHash. FuncKey carries 3
|
||||
/// String fields, so any HashMap operation walks ≥30 bytes through the
|
||||
/// hasher; FxHash is ~5x faster than SipHash on this workload. Seed
|
||||
/// is fixed (no DoS hardening), which is fine for an in-process index
|
||||
/// keyed by static program-derived names.
|
||||
by_key: FxHashMap<FuncKey, FuncSummary>,
|
||||
/// Bare leaf-name index, kept for compatibility with callers that only
|
||||
/// see an unqualified call string. A single name may map to many keys
|
||||
/// across containers / files / arities.
|
||||
by_lang_name: HashMap<(Lang, String), Vec<FuncKey>>,
|
||||
by_lang_name: FxHashMap<(Lang, String), Vec<FuncKey>>,
|
||||
/// Container-qualified index: keyed on `"{container}::{name}"` (or just
|
||||
/// `name` for free functions). Used to resolve calls when the call-site
|
||||
/// can supply a receiver / container hint (e.g. `OrderService::process`).
|
||||
by_lang_qualified: HashMap<(Lang, String), Vec<FuncKey>>,
|
||||
by_lang_qualified: FxHashMap<(Lang, String), Vec<FuncKey>>,
|
||||
/// Rust-only secondary index keyed on `(module_path, name)`.
|
||||
///
|
||||
/// Populated whenever a Rust [`FuncSummary`] is inserted with a
|
||||
|
|
@ -533,7 +539,7 @@ pub struct GlobalSummaries {
|
|||
/// candidates by their crate-relative module rather than their
|
||||
/// filesystem path. Same name / module / arity overloads land on the
|
||||
/// same vector, arity narrowing happens at resolution time.
|
||||
by_rust_module: HashMap<(String, String), Vec<FuncKey>>,
|
||||
by_rust_module: FxHashMap<(String, String), Vec<FuncKey>>,
|
||||
/// Precise SSA-derived per-parameter summaries, keyed by `FuncKey`.
|
||||
/// These take precedence over `FuncSummary` during callee resolution.
|
||||
ssa_by_key: HashMap<FuncKey, SsaFuncSummary>,
|
||||
|
|
@ -546,6 +552,18 @@ pub struct GlobalSummaries {
|
|||
/// pass 1 and consumed by
|
||||
/// [`crate::auth_analysis::run_auth_analysis`] during pass 2.
|
||||
auth_by_key: HashMap<FuncKey, crate::auth_analysis::model::AuthCheckSummary>,
|
||||
/// Per-Python-file router declarations + `include_router` edges,
|
||||
/// keyed by `module_id_for_storage(file_path)` (basename without
|
||||
/// `.py`, or `parent_dir::__init__` for `__init__.py`). Populated
|
||||
/// during pass 1 and consumed by
|
||||
/// [`Self::resolve_cross_file_router_deps`] at pass 2 entry to lift
|
||||
/// FastAPI router-level `dependencies=[Security(...)]` declared in a
|
||||
/// parent file (`__init__.py` calling
|
||||
/// `<parent>.include_router(<child>.router, ...)`) onto the bare
|
||||
/// child router declared in another file — closing the airflow
|
||||
/// execution-API auth-recognition gap on routes attached to bare
|
||||
/// child routers.
|
||||
router_facts_by_module: HashMap<String, crate::auth_analysis::router_facts::PerFileRouterFacts>,
|
||||
/// Type hierarchy index for runtime virtual-dispatch fan-out.
|
||||
///
|
||||
/// Installed by [`Self::install_hierarchy`] after pass 1 from the
|
||||
|
|
@ -856,6 +874,11 @@ impl GlobalSummaries {
|
|||
for (key, auth_sum) in other.auth_by_key {
|
||||
self.auth_by_key.insert(key, auth_sum);
|
||||
}
|
||||
// Router facts: last-writer-wins per (module_id) key. Re-analysing
|
||||
// a file produces a fresh snapshot of its router declarations + edges.
|
||||
for (module_id, facts) in other.router_facts_by_module {
|
||||
self.router_facts_by_module.insert(module_id, facts);
|
||||
}
|
||||
// Hierarchy index: invalidate after a merge so the next consumer
|
||||
// sees a freshly-built view that includes `other`'s edges. The
|
||||
// alternative, point-merging two indexes, is racy when the
|
||||
|
|
@ -991,6 +1014,80 @@ impl GlobalSummaries {
|
|||
self.auth_by_key.len()
|
||||
}
|
||||
|
||||
/// Insert a per-file `PerFileRouterFacts` snapshot. Last-writer-wins
|
||||
/// per `module_id` key — re-analysing a file produces a fresh
|
||||
/// snapshot of its router declarations and `include_router` edges.
|
||||
pub fn insert_router_facts(
|
||||
&mut self,
|
||||
module_id: String,
|
||||
facts: crate::auth_analysis::router_facts::PerFileRouterFacts,
|
||||
) {
|
||||
self.router_facts_by_module.insert(module_id, facts);
|
||||
}
|
||||
|
||||
/// Resolve cross-file router-level deps for the file identified by
|
||||
/// `child_module_id`. Walks every other file's persisted
|
||||
/// `RouterIncludeEdge` list, finds edges whose `child_module_id`
|
||||
/// matches, and accumulates the parent file's
|
||||
/// `local_router_deps[parent_var]` against `child_var` — producing
|
||||
/// a `<child_var> → Vec<(CallSite, scoped_security)>` map ready to
|
||||
/// merge into the active file's
|
||||
/// `AuthorizationModel.cross_file_router_deps`.
|
||||
///
|
||||
/// Single-hop only. Transitive lifts (`grandparent.include_router(parent);
|
||||
/// parent.include_router(child)`) are not currently resolved — the
|
||||
/// airflow shape that motivated this fix is single-hop, and adding
|
||||
/// transitive resolution is a follow-up that would also need to
|
||||
/// model the bare-identifier `outer.include_router(inner_router)`
|
||||
/// case which the extractor presently skips.
|
||||
///
|
||||
/// Returns an empty map when `child_module_id` matches no edges or
|
||||
/// when the index is empty.
|
||||
pub fn resolve_cross_file_router_deps(
|
||||
&self,
|
||||
child_module_id: &str,
|
||||
) -> HashMap<String, Vec<(crate::auth_analysis::model::CallSite, bool)>> {
|
||||
let mut out: HashMap<String, Vec<(crate::auth_analysis::model::CallSite, bool)>> =
|
||||
HashMap::new();
|
||||
if self.router_facts_by_module.is_empty() {
|
||||
return out;
|
||||
}
|
||||
for facts in self.router_facts_by_module.values() {
|
||||
for edge in &facts.include_router_edges {
|
||||
if edge.child_module_id != child_module_id {
|
||||
continue;
|
||||
}
|
||||
// Look up the parent's deps in the SAME file's
|
||||
// local_router_deps map (parent declarations and the
|
||||
// include_router edge live in the same file).
|
||||
let Some(parent_deps) = facts.local_router_deps.get(&edge.parent_var) else {
|
||||
continue;
|
||||
};
|
||||
if parent_deps.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let entry = out.entry(edge.child_var.clone()).or_default();
|
||||
for dep in parent_deps {
|
||||
// Dedup by (callee name, scoped flag) so multiple
|
||||
// parents declaring the same dep don't double-fire.
|
||||
let already = entry
|
||||
.iter()
|
||||
.any(|(call, scoped)| call.name == dep.0.name && *scoped == dep.1);
|
||||
if !already {
|
||||
entry.push(dep.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Count of files that contributed router facts to the index.
|
||||
/// Exposed for `tracing::debug!` observability.
|
||||
pub fn router_facts_len(&self) -> usize {
|
||||
self.router_facts_by_module.len()
|
||||
}
|
||||
|
||||
/// Insert a cross-file callee body.
|
||||
///
|
||||
/// See [`insert_ssa`](Self::insert_ssa) for the identity-safety rule.
|
||||
|
|
@ -1050,7 +1147,10 @@ impl GlobalSummaries {
|
|||
|
||||
#[allow(dead_code)] // used by tests and future call-graph consumers
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.by_key.is_empty() && self.ssa_by_key.is_empty() && self.auth_by_key.is_empty()
|
||||
self.by_key.is_empty()
|
||||
&& self.ssa_by_key.is_empty()
|
||||
&& self.auth_by_key.is_empty()
|
||||
&& self.router_facts_by_module.is_empty()
|
||||
}
|
||||
|
||||
/// Iterate over all (key, summary) pairs.
|
||||
|
|
@ -1582,6 +1682,7 @@ impl std::fmt::Debug for GlobalSummaries {
|
|||
.field("ssa_len", &self.ssa_by_key.len())
|
||||
.field("bodies_len", &self.bodies_by_key.len())
|
||||
.field("auth_len", &self.auth_by_key.len())
|
||||
.field("router_facts_len", &self.router_facts_by_module.len())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3851,6 +3851,126 @@ fn cross_file_devirt_does_not_union_unrelated_findbyids() {
|
|||
assert_eq!(cache_sum.tainted_sink_params, vec![0]);
|
||||
}
|
||||
|
||||
/// Cross-file router-dep resolution: parent `__init__.py` declares
|
||||
/// `Security(...)` deps on a router and lifts them onto a child via
|
||||
/// `<parent>.include_router(<child_module>.<child_var>, ...)`. The
|
||||
/// resolution must produce a `<child_var> → Vec<(CallSite, scoped)>`
|
||||
/// map for the child file's `module_id`, and absent edges must yield
|
||||
/// empty.
|
||||
#[test]
|
||||
fn resolve_cross_file_router_deps_lifts_parent_security_dep_onto_child_router() {
|
||||
use crate::auth_analysis::model::CallSite;
|
||||
use crate::auth_analysis::router_facts::{PerFileRouterFacts, RouterIncludeEdge};
|
||||
|
||||
let mut gs = GlobalSummaries::new();
|
||||
// Parent (__init__.py) declares scoped Security on `authenticated_router`
|
||||
// and emits two include_router edges (task_instances + dag_runs).
|
||||
let parent_callsite = CallSite {
|
||||
name: "require_auth".into(),
|
||||
args: Vec::new(),
|
||||
span: (0, 0),
|
||||
args_value_refs: Vec::new(),
|
||||
};
|
||||
let mut parent_facts = PerFileRouterFacts::default();
|
||||
parent_facts.local_router_deps.insert(
|
||||
"authenticated_router".into(),
|
||||
vec![(parent_callsite.clone(), true)],
|
||||
);
|
||||
parent_facts.include_router_edges.push(RouterIncludeEdge {
|
||||
parent_var: "authenticated_router".into(),
|
||||
child_module_id: "task_instances".into(),
|
||||
child_var: "router".into(),
|
||||
});
|
||||
parent_facts.include_router_edges.push(RouterIncludeEdge {
|
||||
parent_var: "authenticated_router".into(),
|
||||
child_module_id: "dag_runs".into(),
|
||||
child_var: "router".into(),
|
||||
});
|
||||
gs.insert_router_facts("routes::__init__".into(), parent_facts);
|
||||
|
||||
// Child (task_instances.py) declares a bare router → expects to
|
||||
// inherit the parent's deps via the cross-file resolution.
|
||||
gs.insert_router_facts("task_instances".into(), PerFileRouterFacts::default());
|
||||
|
||||
// Resolve for task_instances → should get one entry under `router`
|
||||
// carrying the require_auth (scoped=true) dep.
|
||||
let resolved = gs.resolve_cross_file_router_deps("task_instances");
|
||||
let deps = resolved.get("router").expect("router child resolved");
|
||||
assert_eq!(deps.len(), 1);
|
||||
assert_eq!(deps[0].0.name, "require_auth");
|
||||
assert!(deps[0].1, "scoped flag preserved");
|
||||
|
||||
// dag_runs has the same parent → same lift.
|
||||
let resolved_dag = gs.resolve_cross_file_router_deps("dag_runs");
|
||||
assert_eq!(resolved_dag.get("router").map(|v| v.len()), Some(1));
|
||||
|
||||
// Unrelated module → no lift.
|
||||
let resolved_other = gs.resolve_cross_file_router_deps("nonexistent");
|
||||
assert!(resolved_other.is_empty());
|
||||
}
|
||||
|
||||
/// Edge: parent without local deps for the named var emits nothing —
|
||||
/// the resolver requires both an edge AND a non-empty parent dep list.
|
||||
#[test]
|
||||
fn resolve_cross_file_router_deps_skips_edges_with_no_parent_deps() {
|
||||
use crate::auth_analysis::router_facts::{PerFileRouterFacts, RouterIncludeEdge};
|
||||
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let mut parent = PerFileRouterFacts::default();
|
||||
parent.include_router_edges.push(RouterIncludeEdge {
|
||||
parent_var: "ghost_router".into(),
|
||||
child_module_id: "child".into(),
|
||||
child_var: "router".into(),
|
||||
});
|
||||
gs.insert_router_facts("parent".into(), parent);
|
||||
|
||||
let resolved = gs.resolve_cross_file_router_deps("child");
|
||||
assert!(resolved.is_empty());
|
||||
}
|
||||
|
||||
/// Multiple parents declaring different deps for the same child
|
||||
/// accumulate without duplication. Same dep declared twice (one
|
||||
/// from each parent) must dedup by (callee.name, scoped).
|
||||
#[test]
|
||||
fn resolve_cross_file_router_deps_dedups_duplicate_parent_deps() {
|
||||
use crate::auth_analysis::model::CallSite;
|
||||
use crate::auth_analysis::router_facts::{PerFileRouterFacts, RouterIncludeEdge};
|
||||
|
||||
let cs = CallSite {
|
||||
name: "require_auth".into(),
|
||||
args: Vec::new(),
|
||||
span: (0, 0),
|
||||
args_value_refs: Vec::new(),
|
||||
};
|
||||
let mut gs = GlobalSummaries::new();
|
||||
|
||||
// Parent A: include_router(child.router) with `require_auth` dep.
|
||||
let mut p_a = PerFileRouterFacts::default();
|
||||
p_a.local_router_deps
|
||||
.insert("router_a".into(), vec![(cs.clone(), true)]);
|
||||
p_a.include_router_edges.push(RouterIncludeEdge {
|
||||
parent_var: "router_a".into(),
|
||||
child_module_id: "child".into(),
|
||||
child_var: "router".into(),
|
||||
});
|
||||
gs.insert_router_facts("parent_a".into(), p_a);
|
||||
|
||||
// Parent B: SAME dep, different parent file.
|
||||
let mut p_b = PerFileRouterFacts::default();
|
||||
p_b.local_router_deps
|
||||
.insert("router_b".into(), vec![(cs, true)]);
|
||||
p_b.include_router_edges.push(RouterIncludeEdge {
|
||||
parent_var: "router_b".into(),
|
||||
child_module_id: "child".into(),
|
||||
child_var: "router".into(),
|
||||
});
|
||||
gs.insert_router_facts("parent_b".into(), p_b);
|
||||
|
||||
let resolved = gs.resolve_cross_file_router_deps("child");
|
||||
let deps = resolved.get("router").expect("router resolved");
|
||||
assert_eq!(deps.len(), 1, "duplicate (callee, scoped) deduplicated");
|
||||
}
|
||||
|
||||
// ── the analysis ────────────────────
|
||||
//
|
||||
// `GlobalSummaries::resolve_callee_widened` is the runtime counterpart of
|
||||
|
|
|
|||
|
|
@ -211,6 +211,41 @@ fn is_bounded_length_check(lower: &str) -> bool {
|
|||
false
|
||||
}
|
||||
|
||||
/// Normalise an identifier to its snake-case lowercase form so that
|
||||
/// camelCase / PascalCase / SCREAMING variants line up against snake-cased
|
||||
/// prefix lists (`is_safe`, `is_authorized`, `is_authenticated`).
|
||||
///
|
||||
/// Underscore is inserted at every case boundary:
|
||||
/// - lowercase/digit → uppercase (`isSafe` → `is_safe`)
|
||||
/// - uppercase → uppercase-then-lowercase (`HTTPClient` → `http_client`)
|
||||
///
|
||||
/// Inputs already in snake_case round-trip unchanged: `is_safe` → `is_safe`.
|
||||
/// Used by `classify_condition` so a sanitiser predicate authored in any
|
||||
/// of the dominant identifier conventions classifies the same.
|
||||
pub(crate) fn to_snake_lower(s: &str) -> String {
|
||||
let chars: Vec<char> = s.chars().collect();
|
||||
let mut out = String::with_capacity(chars.len() + 4);
|
||||
for i in 0..chars.len() {
|
||||
let c = chars[i];
|
||||
if c.is_ascii_uppercase() {
|
||||
if i > 0 {
|
||||
let prev = chars[i - 1];
|
||||
let next = chars.get(i + 1).copied();
|
||||
let between_camel = prev.is_ascii_lowercase() || prev.is_ascii_digit();
|
||||
let acronym_end =
|
||||
prev.is_ascii_uppercase() && next.is_some_and(|n| n.is_ascii_lowercase());
|
||||
if (between_camel || acronym_end) && !out.ends_with('_') {
|
||||
out.push('_');
|
||||
}
|
||||
}
|
||||
out.push(c.to_ascii_lowercase());
|
||||
} else {
|
||||
out.push(c.to_ascii_lowercase());
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Parse a leading non-negative integer literal (decimal only).
|
||||
fn parse_leading_uint(s: &str) -> Option<u64> {
|
||||
let mut n: u64 = 0;
|
||||
|
|
@ -384,13 +419,35 @@ pub fn classify_condition(text: &str) -> PredicateKind {
|
|||
.unwrap_or(callee_part)
|
||||
.trim();
|
||||
|
||||
// Derive a snake-cased form from the **original** text so that
|
||||
// camelCase identifiers (`isSafeRemoteUrl`, `isAuthorized`,
|
||||
// `isValidUUID`) classify against the snake-cased prefix list
|
||||
// (`is_safe`, `is_authorized`, `is_authenticated`) the same as
|
||||
// `is_safe_remote_url` would. Required to recognise CVE-2026-33486
|
||||
// (roadiz/documents `isSafeRemoteUrl` SSRF sanitiser) as a
|
||||
// ValidationCall on the patched fixture. Mirrors the trim/strip
|
||||
// pipeline above on case-preserved text so the snake form lines up
|
||||
// with `bare`.
|
||||
let orig_trimmed = text.trim_start_matches(['(', '!', ' ', '\t']);
|
||||
let orig_trimmed = orig_trimmed
|
||||
.strip_prefix("not ")
|
||||
.unwrap_or(orig_trimmed)
|
||||
.trim();
|
||||
let orig_callee_part = orig_trimmed.split('(').next().unwrap_or("");
|
||||
let orig_bare = orig_callee_part
|
||||
.rsplit(['.', ':'])
|
||||
.next()
|
||||
.unwrap_or(orig_callee_part)
|
||||
.trim();
|
||||
let bare_snake = to_snake_lower(orig_bare);
|
||||
|
||||
// Validation
|
||||
if bare.contains("valid")
|
||||
|| bare.contains("check")
|
||||
|| bare.contains("verify")
|
||||
|| bare.starts_with("is_safe")
|
||||
|| bare.starts_with("is_authorized")
|
||||
|| bare.starts_with("is_authenticated")
|
||||
|| bare_snake.starts_with("is_safe")
|
||||
|| bare_snake.starts_with("is_authorized")
|
||||
|| bare_snake.starts_with("is_authenticated")
|
||||
{
|
||||
return PredicateKind::ValidationCall;
|
||||
}
|
||||
|
|
@ -734,8 +791,12 @@ fn extract_validation_target(text: &str) -> Option<String> {
|
|||
// not corrupt the argument substring.
|
||||
let first_arg = first_call_arg(args_part)?;
|
||||
|
||||
// Strip reference operators (e.g. `&x` → `x`)
|
||||
// Strip reference operators (e.g. `&x` → `x`) and PHP variable sigil
|
||||
// (`$url` → `url`) so the extracted target lines up with the var-name
|
||||
// form used in branch-narrowing. Mirrors the `$` strip already done by
|
||||
// `extract_allowlist_target` for `in_array($cmd, $allowed)`.
|
||||
let first_arg = first_arg.strip_prefix('&').unwrap_or(first_arg).trim();
|
||||
let first_arg = first_arg.strip_prefix('$').unwrap_or(first_arg);
|
||||
|
||||
if !first_arg.is_empty() && is_identifier(first_arg) {
|
||||
Some(first_arg.to_string())
|
||||
|
|
@ -991,6 +1052,63 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_camelcase_safety_validators_are_validation_call() {
|
||||
// Real-CVE shape: roadiz/documents `isSafeRemoteUrl($url)` (CVE-2026-33486).
|
||||
// Without snake-case normalisation, the bare `issaferemoteurl` would
|
||||
// not match the `is_safe` prefix and the predicate would silently
|
||||
// fall into `Comparison`/`Unknown`, leaving `$url` un-validated past
|
||||
// the early-return.
|
||||
assert_eq!(
|
||||
classify_condition("self::isSafeRemoteUrl($url)"),
|
||||
PredicateKind::ValidationCall
|
||||
);
|
||||
assert_eq!(
|
||||
classify_condition("isAuthorized(user)"),
|
||||
PredicateKind::ValidationCall
|
||||
);
|
||||
assert_eq!(
|
||||
classify_condition("isAuthenticated(req)"),
|
||||
PredicateKind::ValidationCall
|
||||
);
|
||||
// Acronym handling: `isValidUUID` → `is_valid_uuid` → contains "valid".
|
||||
assert_eq!(
|
||||
classify_condition("isValidUUID(id)"),
|
||||
PredicateKind::ValidationCall
|
||||
);
|
||||
// Snake-case round-trips unchanged.
|
||||
assert_eq!(
|
||||
classify_condition("is_safe_remote_url(x)"),
|
||||
PredicateKind::ValidationCall
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_validation_target_strips_php_dollar_sigil() {
|
||||
// PHP `$url` strips the sigil so the extracted target lines up with
|
||||
// the var-name form used in branch narrowing. Required for
|
||||
// CVE-2026-33486 patched fixture to silence on `fopen($url, 'r')`.
|
||||
assert_eq!(
|
||||
extract_validation_target("self::isSafeRemoteUrl($url)"),
|
||||
Some("url".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
extract_validation_target("validate($input)"),
|
||||
Some("input".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn to_snake_lower_handles_common_variants() {
|
||||
assert_eq!(to_snake_lower("isSafeRemoteUrl"), "is_safe_remote_url");
|
||||
assert_eq!(to_snake_lower("isValidUUID"), "is_valid_uuid");
|
||||
assert_eq!(to_snake_lower("HTTPClient"), "http_client");
|
||||
assert_eq!(to_snake_lower("IsSafe"), "is_safe");
|
||||
assert_eq!(to_snake_lower("is_safe"), "is_safe");
|
||||
assert_eq!(to_snake_lower("validate"), "validate");
|
||||
assert_eq!(to_snake_lower(""), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_validation_requires_paren() {
|
||||
// `x_valid == true` should NOT be ValidationCall, no `(` call syntax.
|
||||
|
|
|
|||
|
|
@ -1523,6 +1523,121 @@ fn apply_input_validator_branch_narrowing(
|
|||
}
|
||||
}
|
||||
|
||||
/// JS/TS Array-method validator-callback narrowing.
|
||||
///
|
||||
/// `arr.filter(isSafeIdentifier)`, `arr.find(isValidId)`, and the
|
||||
/// `findLast` variant are gating array methods whose return value is
|
||||
/// composed of elements that passed the callback. When the callback
|
||||
/// argument resolves to a name `classify_input_validator_callee` tags
|
||||
/// as `BooleanTrueIsValid` (`isValid…`, `isSafe…`, `hasValid…` and
|
||||
/// snake-case variants), every element of the result satisfies the
|
||||
/// validator, so the call's downstream sinks see the same flow as
|
||||
/// validated taint.
|
||||
///
|
||||
/// The companion `if (isValidX(x)) use(x)` narrowing already exists in
|
||||
/// [`apply_input_validator_branch_narrowing`]; this is the same idea
|
||||
/// lifted to the call site for filter/find chains so taint stops at
|
||||
/// the gate rather than leaking through subsequent
|
||||
/// `Array[index]`/template/sink reads.
|
||||
///
|
||||
/// Strict-additive: if the callback's name does not match the
|
||||
/// validator pattern (anonymous arrow, opaque identifier, etc.), the
|
||||
/// helper is a no-op and the existing default propagation runs
|
||||
/// unchanged.
|
||||
///
|
||||
/// Motivated by CVE-2026-42353 (i18next-http-middleware path
|
||||
/// traversal): the patched fix is `languages.filter(utils.isSafeIdentifier)`
|
||||
/// before forwarding `languages` into the backend connector, and the
|
||||
/// dual deferred TS-side gap CVE-2026-25544 (Payload sqli).
|
||||
fn try_array_method_validator_callback_narrowing(
|
||||
inst: &SsaInst,
|
||||
info: &NodeInfo,
|
||||
callee: &str,
|
||||
args: &[SmallVec<[SsaValue; 2]>],
|
||||
return_bits: &mut Cap,
|
||||
return_origins: &mut SmallVec<[TaintOrigin; 2]>,
|
||||
state: &mut SsaTaintState,
|
||||
transfer: &SsaTaintTransfer,
|
||||
ssa: &SsaBody,
|
||||
) -> bool {
|
||||
if !matches!(transfer.lang, Lang::JavaScript | Lang::TypeScript) {
|
||||
return false;
|
||||
}
|
||||
// Method-call shape: callee text contains a `.` and the trailing
|
||||
// segment is one of the gating array methods. `findIndex` /
|
||||
// `every` / `some` return scalar shapes (index, boolean) rather
|
||||
// than a filtered collection so they are excluded — element-level
|
||||
// validation does not apply to a numeric/boolean result.
|
||||
let dot = match callee.rfind('.') {
|
||||
Some(p) => p,
|
||||
None => return false,
|
||||
};
|
||||
let method = &callee[dot + 1..];
|
||||
if !matches!(method, "filter" | "find" | "findLast") {
|
||||
return false;
|
||||
}
|
||||
// The first positional argument's callable name. Two channels:
|
||||
// 1. `info.arg_callees` — populated by `extract_arg_callees`
|
||||
// (`call_ident_of` walks call shapes inside the arg). Catches
|
||||
// `arr.filter(cb())` and dotted-callback shapes where the
|
||||
// tree-sitter node kind reaches `Kind::CallFn` or
|
||||
// `Kind::CallMethod`.
|
||||
// 2. SSA `value_defs[v].var_name` for the arg's first SSA value
|
||||
// — covers the bare-identifier shape (`arr.filter(cb)`)
|
||||
// where the AST node is a plain identifier and
|
||||
// `extract_arg_callees` pushes `None` because there is no
|
||||
// call to recurse into. This is the shape every patched
|
||||
// CVE fix uses, so it is the dominant source of validator
|
||||
// callbacks in real code.
|
||||
let arg0 = match args.first() {
|
||||
Some(a) => a,
|
||||
None => return false,
|
||||
};
|
||||
let cb_from_arg_callees = info.arg_callees.first().and_then(|s| s.as_deref());
|
||||
let cb_from_ssa = arg0.iter().find_map(|&v| {
|
||||
ssa.value_defs
|
||||
.get(v.0 as usize)
|
||||
.and_then(|vd| vd.var_name.as_deref())
|
||||
});
|
||||
let cb_name = match cb_from_arg_callees.or(cb_from_ssa) {
|
||||
Some(n) => n,
|
||||
None => return false,
|
||||
};
|
||||
if crate::ssa::type_facts::classify_input_validator_callee(cb_name)
|
||||
!= Some(InputValidatorPolarity::BooleanTrueIsValid)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Strip every cap from the return value: the returned array (or
|
||||
// single found element) is composed exclusively of elements the
|
||||
// recognised validator approved. `Cap::all()` is the conservative
|
||||
// ceiling because the validator's body is opaque to this layer; a
|
||||
// future extension could narrow caps by inspecting the body's
|
||||
// rejection patterns.
|
||||
*return_bits = Cap::empty();
|
||||
return_origins.clear();
|
||||
|
||||
// Mark the result's var_name as validated, mirroring the
|
||||
// [`apply_input_validator_branch_narrowing`] insertion. Useful
|
||||
// for direct same-name reads of the rebound array (`arr =
|
||||
// arr.filter(p)` then `arr.length`) but does not propagate
|
||||
// through Assigns to differently-named bindings (`const lng =
|
||||
// arr[0]`); the `return_bits` strip above is what gates those
|
||||
// downstream flows.
|
||||
if let Some(name) = ssa
|
||||
.value_defs
|
||||
.get(inst.value.0 as usize)
|
||||
.and_then(|vd| vd.var_name.as_deref())
|
||||
{
|
||||
if let Some(sym) = transfer.interner.get(name) {
|
||||
state.validated_must.insert(sym);
|
||||
state.validated_may.insert(sym);
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Find the latest reaching SSA definition for `var_name` at the end of
|
||||
/// `block`. Mirrors `crate::constraint::lower::resolve_single_var` but
|
||||
/// avoids the cross-module privacy leak: callers in this module need it
|
||||
|
|
@ -4081,6 +4196,24 @@ pub(super) fn transfer_inst(
|
|||
}
|
||||
}
|
||||
|
||||
// Receiver-side validator strip. Some method-call validators
|
||||
// raise on failure rather than transforming a return value,
|
||||
// so the canonical `Sanitizer` mechanism (which clears the
|
||||
// return) is the wrong shape. After the call returns, the
|
||||
// *receiver* (and any args carrying the same equivalence
|
||||
// class) is proven to satisfy the validated property. Strip
|
||||
// the registered cap from receiver+args here so that
|
||||
// `path.relative_to(base)` clears `Cap::FILE_IO` from
|
||||
// `path` for downstream uses. Motivated by CVE-2024-23334
|
||||
// (aiohttp StaticResource symlink-bypass): the patched code
|
||||
// calls `filepath.relative_to(self._directory)` inside a
|
||||
// try/except and serves `filepath` afterwards.
|
||||
if let Some(cap) =
|
||||
crate::labels::lookup_receiver_validator(transfer.lang.as_str(), callee)
|
||||
{
|
||||
strip_cap_from_call_args(args, receiver, state, cap);
|
||||
}
|
||||
|
||||
// Alias-aware sanitization: propagate through must-aliased field paths
|
||||
if !sanitizer_bits.is_empty() {
|
||||
if let Some(aliases) = transfer.base_aliases {
|
||||
|
|
@ -4444,6 +4577,28 @@ pub(super) fn transfer_inst(
|
|||
}
|
||||
}
|
||||
|
||||
// JS/TS array-method validator-callback narrowing. When a
|
||||
// call shape matches `<arr>.filter(<recognised-validator>)`
|
||||
// (or `find` / `findLast`), strip the caps that flowed into
|
||||
// `return_bits` from the receiver — the result holds only
|
||||
// elements the validator approved. Strict-additive: the
|
||||
// helper is a no-op when the callback name does not match
|
||||
// the BooleanTrueIsValid bucket, leaving the default
|
||||
// propagation result unchanged. See
|
||||
// [`try_array_method_validator_callback_narrowing`] for the
|
||||
// motivating CVE pair.
|
||||
try_array_method_validator_callback_narrowing(
|
||||
inst,
|
||||
info,
|
||||
callee,
|
||||
args,
|
||||
&mut return_bits,
|
||||
&mut return_origins,
|
||||
state,
|
||||
transfer,
|
||||
ssa,
|
||||
);
|
||||
|
||||
// Constructor cap narrowing: a `new X(...)` call returns an object
|
||||
// instance, not a string. Caps that name a string-shaped sink
|
||||
// pattern (path argument, format string, URL component, JSON
|
||||
|
|
|
|||
|
|
@ -6779,3 +6779,83 @@ const handler = (req, res) => {
|
|||
"expected taint flow via double-call chain rebinding; got 0 findings",
|
||||
);
|
||||
}
|
||||
|
||||
/// CVE-2026-42353 i18next-http-middleware: the patched fix wraps a
|
||||
/// tainted array in `arr.filter(isSafeIdentifier)` before forwarding.
|
||||
/// `try_array_method_validator_callback_narrowing` recognises the
|
||||
/// `<arr>.filter(<recognised-validator>)` shape on JS/TS and strips
|
||||
/// the receiver-derived caps from the call result, so a downstream
|
||||
/// `arr[0]` → template-literal → `fs.readFileSync` chain no longer
|
||||
/// flags. The bare-identifier callback case is the dominant patched
|
||||
/// shape — `extract_arg_callees` returns `None` for plain
|
||||
/// identifiers (no inner call to recurse into), so the helper falls
|
||||
/// back to the SSA value's `var_name` channel.
|
||||
#[test]
|
||||
fn cve_2026_42353_filter_isvalid_callback_strips_taint() {
|
||||
let src = br#"
|
||||
const fs = require('fs');
|
||||
function isSafeIdentifier(v) {
|
||||
return typeof v === 'string' && v.indexOf('..') === -1 && v.indexOf('/') === -1;
|
||||
}
|
||||
function handler(req, res) {
|
||||
let languages = req.query.lng ? req.query.lng.split(' ') : [];
|
||||
languages = languages.filter(isSafeIdentifier);
|
||||
const lng = languages[0];
|
||||
const filename = `/locales/${lng}.json`;
|
||||
fs.readFileSync(filename);
|
||||
}
|
||||
"#;
|
||||
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let file_cfg = parse_lang(src, "javascript", lang);
|
||||
let summaries = &file_cfg.summaries;
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
summaries,
|
||||
None,
|
||||
Lang::JavaScript,
|
||||
"test.js",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
findings.is_empty(),
|
||||
"expected no taint flow when filtered through isSafeIdentifier; got {} findings",
|
||||
findings.len(),
|
||||
);
|
||||
}
|
||||
|
||||
/// Negative regression for the array-method validator-callback gate:
|
||||
/// the same shape WITHOUT the `filter(isSafe…)` step keeps the path
|
||||
/// traversal flow alive end-to-end. Pins the precision claim — the
|
||||
/// strip is element-of-array-after-filter scoped, not a wholesale
|
||||
/// kill on any `<arr>.filter` call regardless of callback identity.
|
||||
#[test]
|
||||
fn cve_2026_42353_filter_without_validator_callback_preserves_taint() {
|
||||
let src = br#"
|
||||
const fs = require('fs');
|
||||
function pickFirst(v) { return true; }
|
||||
function handler(req, res) {
|
||||
let languages = req.query.lng ? req.query.lng.split(' ') : [];
|
||||
languages = languages.filter(pickFirst);
|
||||
const lng = languages[0];
|
||||
const filename = `/locales/${lng}.json`;
|
||||
fs.readFileSync(filename);
|
||||
}
|
||||
"#;
|
||||
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let file_cfg = parse_lang(src, "javascript", lang);
|
||||
let summaries = &file_cfg.summaries;
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
summaries,
|
||||
None,
|
||||
Lang::JavaScript,
|
||||
"test.js",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
!findings.is_empty(),
|
||||
"expected taint flow via filter(pickFirst) — pickFirst is not a recognised validator and must not strip taint; got 0 findings",
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -544,6 +544,16 @@ pub struct AuthAnalysisConfig {
|
|||
/// not need an ownership check. Defaults are set per-language in
|
||||
/// `auth_analysis::config::build_auth_rules`.
|
||||
pub acl_tables: Vec<String>,
|
||||
/// Callee names that, when they appear as the chain root of a
|
||||
/// chained-call shape (`select(X).filter_by(...)`,
|
||||
/// `query(X).filter(...)`), anchor the trailing method as a DB
|
||||
/// query-builder operation. Used to override the chained-call
|
||||
/// suppression in `classify_sink_class` for SQLAlchemy / similar
|
||||
/// query-builder idioms whose first call returns an opaque builder
|
||||
/// object the type tracker cannot resolve. Defaults set per
|
||||
/// language in `auth_analysis::config::build_auth_rules`.
|
||||
#[serde(default)]
|
||||
pub db_query_builder_roots: Vec<String>,
|
||||
}
|
||||
|
||||
impl Default for AuthAnalysisConfig {
|
||||
|
|
@ -568,6 +578,7 @@ impl Default for AuthAnalysisConfig {
|
|||
outbound_network_receiver_prefixes: Vec::new(),
|
||||
cache_receiver_prefixes: Vec::new(),
|
||||
acl_tables: Vec::new(),
|
||||
db_query_builder_roots: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1158,6 +1169,10 @@ pub(crate) fn merge_configs(mut default: Config, user: Config) -> Config {
|
|||
user_lang_cfg.auth.cache_receiver_prefixes,
|
||||
);
|
||||
extend_dedup(&mut entry.auth.acl_tables, user_lang_cfg.auth.acl_tables);
|
||||
extend_dedup(
|
||||
&mut entry.auth.db_query_builder_roots,
|
||||
user_lang_cfg.auth.db_query_builder_roots,
|
||||
);
|
||||
}
|
||||
|
||||
default
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue