Performance and precision pass (#64)

This commit is contained in:
Eli Peter 2026-05-04 19:58:04 -04:00 committed by GitHub
parent c7c5e0f3a1
commit fb698d2c27
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
97 changed files with 9932 additions and 517 deletions

View file

@ -60,6 +60,7 @@ pub mod checks;
pub mod config;
pub mod extract;
pub mod model;
pub mod router_facts;
pub mod sql_semantics;
use crate::commands::scan::Diag;
@ -102,21 +103,98 @@ pub fn run_auth_analysis(
if !rules.enabled {
return Vec::new();
}
let mut model = extract::extract_authorization_model(
// Resolve cross-file router-deps for the active file (Python only)
// before constructing the model, so the FlaskExtractor sees the
// full per-file dep map at extraction time. See `router_facts`
// module + `analyse_file_fused` for the wider pipeline.
let cross_file_router_deps =
resolve_cross_file_router_deps_for_file(lang, file_path, global_summaries);
let model = extract::extract_authorization_model(
lang,
cfg.framework_ctx.as_ref(),
tree,
source,
file_path,
&rules,
cross_file_router_deps.as_ref(),
);
run_auth_analysis_with_model(
model,
tree,
lang,
file_path,
&rules,
var_types,
global_summaries,
scan_root,
)
}
/// Look up `GlobalSummaries.router_facts_by_module` and resolve the
/// cross-file router-deps map for the file at `file_path`. Returns
/// `None` for non-Python files, files whose module_id has no matching
/// `<parent>.include_router(<this_file>.<var>, ...)` edges anywhere in
/// the project, or callers that don't pass `global_summaries`.
pub(crate) fn resolve_cross_file_router_deps_for_file(
lang: &str,
file_path: &Path,
global_summaries: Option<&GlobalSummaries>,
) -> Option<HashMap<String, Vec<(model::CallSite, bool)>>> {
if lang != "python" {
return None;
}
let gs = global_summaries?;
let module_id = router_facts::module_id_for_path(file_path)?;
let resolved = gs.resolve_cross_file_router_deps(&module_id);
if resolved.is_empty() {
None
} else {
Some(resolved)
}
}
/// Variant of [`run_auth_analysis`] that accepts a pre-built
/// [`model::AuthorizationModel`] instead of building one from the AST.
///
/// Lets callers that need both diagnostics AND
/// `(FuncKey, AuthCheckSummary)` per-file summaries (the fused pass-2
/// path in [`crate::ast::analyse_file_fused`]) construct the base
/// authorization model exactly once and route both consumers through
/// it. Pre-fix the fused path called
/// [`extract::extract_authorization_model`] twice per file (once via
/// [`run_auth_analysis`], once via [`extract_auth_summaries_by_key`]),
/// duplicating the AST walks for `collect_top_level_units` +
/// `build_function_unit_with_meta` + `collect_unit_state` + every
/// extractor's framework-detection scan. On the
/// `mattermost/server/channels/app` profile that double-extract
/// accounted for 35.3% of total wall-clock; sharing the base model
/// drops it to ~17.6%.
///
/// The mutations applied here (`apply_var_types_to_model`,
/// `apply_typed_bounded_params`, `apply_helper_lifting`) only
/// affect diagnostic emission — `extract_auth_summaries_from_model`
/// reads the **base** model so callers must extract summaries before
/// passing the model in.
#[allow(clippy::too_many_arguments)]
pub fn run_auth_analysis_with_model(
mut model: model::AuthorizationModel,
tree: &Tree,
lang: &str,
file_path: &Path,
rules: &config::AuthAnalysisRules,
var_types: Option<&VarTypes>,
global_summaries: Option<&GlobalSummaries>,
scan_root: Option<&Path>,
) -> Vec<Diag> {
if !rules.enabled {
return Vec::new();
}
// Refine `SensitiveOperation::sink_class` using SSA-derived
// variable types. Runs only when the caller supplied `var_types`
// (skipped for slug-lookup / unit-test call sites).
if let Some(types) = var_types {
apply_var_types_to_model(&mut model, &rules, types);
apply_var_types_to_model(&mut model, rules, types);
apply_typed_bounded_params(&mut model, types);
}
@ -128,11 +206,16 @@ pub fn run_auth_analysis(
// (when provided) for cross-file helpers that live in other files.
apply_helper_lifting(&mut model, lang, file_path, scan_root, global_summaries);
// Phase 1 caller-scope IPA: propagate route-handler-level auth
// checks DOWN to callee helper units within the same file. See
// [`apply_caller_scope_propagation`] for the propagation rule.
apply_caller_scope_propagation(&mut model);
if model.routes.is_empty() && model.units.is_empty() {
return Vec::new();
}
checks::run_checks(&model, &rules)
checks::run_checks(&model, rules)
.into_iter()
.map(|finding| auth_finding_to_diag(&finding, tree, file_path))
.collect()
@ -167,8 +250,28 @@ pub fn extract_auth_summaries_by_key(
source,
file_path,
&rules,
None,
);
summaries_keyed_by_func(&model, lang, file_path, scan_root)
extract_auth_summaries_from_model(&model, lang, file_path, scan_root)
}
/// Variant of [`extract_auth_summaries_by_key`] that consumes a
/// pre-built [`model::AuthorizationModel`].
///
/// Designed for callers that also need to run the diagnostic pipeline
/// (which mutates the model via [`run_auth_analysis_with_model`]):
/// extract summaries first against the base model, then hand the same
/// model to the diag pipeline so the second
/// [`extract::extract_authorization_model`] AST walk per file is
/// avoided. See [`run_auth_analysis_with_model`] for the wider
/// rationale and measured saving.
pub fn extract_auth_summaries_from_model(
model: &model::AuthorizationModel,
lang: &str,
file_path: &Path,
scan_root: Option<&Path>,
) -> Vec<(FuncKey, model::AuthCheckSummary)> {
summaries_keyed_by_func(model, lang, file_path, scan_root)
}
/// Convert an already-built [`model::AuthorizationModel`] into a
@ -444,6 +547,203 @@ fn apply_helper_lifting(
}
}
/// Phase 1 caller-scope IPA: propagate route-handler-level auth checks
/// DOWN to callee helper units within the same file.
///
/// `apply_helper_lifting` walks UPWARD: a helper that internally
/// proves ownership / membership / etc. has its summary lifted onto
/// each call site in the caller. But the inverse direction —
/// route handler that authenticates via route-level decorator/
/// dependency, then delegates to a private helper that performs the
/// actual sink — is the dominant FP shape on FastAPI / Django / Flask
/// codebases (sentry, saleor, airflow): the helper has no inline
/// auth_checks of its own, so `check_ownership_gaps` flags every
/// `session.add(...)` / `Model.objects.filter(id=...)` it contains.
///
/// This pass closes that gap inside a single file. For each helper
/// unit, if **every** same-file caller (across the whole call graph)
/// is itself an authorized route handler (route-level non-Login auth
/// check) or has already been authorized via this same propagation
/// in a prior round, lift the caller's route-level checks onto the
/// helper. Iterated to a small fixpoint so transitive helper chains
/// `route → mid_helper → leaf_helper` are also covered.
///
/// Synthetic checks carry `is_route_level=true` so
/// `auth_check_covers_subject` short-circuits coverage for any
/// subject the helper sees, mirroring the in-handler decorator-lift
/// semantics established by [`extract::flask::inject_middleware_auth`].
///
/// **Soundness rule**: a helper's `unit_callers` list must be
/// non-empty AND every caller must be authorized. This refuses to
/// authorize:
/// * helpers with no in-file caller (dead code or external
/// entry-point — could be CLI, cron, test harness, …),
/// * helpers called from a mix of authorized routes and unauthorized
/// callers (the unauthorized path is the real FP attack surface),
/// * helpers called only from another un-lifted helper (no
/// evidence the upstream chain authenticates).
///
/// Cross-file caller-scope IPA — where the route handler lives in
/// file A and the helper in file B — is not yet implemented.
/// Requires plumbing per-file caller auth checks through
/// `GlobalSummaries`, not just the existing per-callee
/// `AuthCheckSummary`. See `deep_engine_fixes.md` for the deferred
/// follow-up.
fn apply_caller_scope_propagation(model: &mut model::AuthorizationModel) {
use model::{AnalysisUnitKind, AuthCheck, AuthCheckKind};
use std::collections::{HashMap, HashSet};
// Build leaf-name → unit_idx map. Only non-route-handler units are
// lift TARGETS; route handlers don't need downward lift since they
// already carry their own route-level auth.
let mut leaf_to_unit: HashMap<String, usize> = HashMap::new();
for (idx, unit) in model.units.iter().enumerate() {
if unit.kind == AnalysisUnitKind::RouteHandler {
continue;
}
let Some(name) = unit.name.as_deref() else {
continue;
};
let leaf = name.rsplit('.').next().unwrap_or(name);
if leaf.is_empty() {
continue;
}
leaf_to_unit.entry(leaf.to_string()).or_insert(idx);
}
// For each callee unit, collect its same-file caller indices.
// Iterates every unit's `call_sites` once; a callee with no
// matching unit (calls into stdlib, framework, third-party) gets
// an empty `unit_callers[i]` and is excluded from propagation
// below.
let mut unit_callers: Vec<Vec<usize>> = vec![Vec::new(); model.units.len()];
for (caller_idx, unit) in model.units.iter().enumerate() {
let mut seen_callees: HashSet<usize> = HashSet::new();
for call in &unit.call_sites {
let leaf = call.name.rsplit('.').next().unwrap_or(&call.name);
if let Some(&callee_idx) = leaf_to_unit.get(leaf)
&& callee_idx != caller_idx
&& seen_callees.insert(callee_idx)
{
unit_callers[callee_idx].push(caller_idx);
}
}
}
// Seed `authorized` only when a unit carries at least one
// route-level Other / Membership / Ownership / AdminGuard check.
// `LoginGuard` alone proves only identity, not authority, and
// `TokenExpiry` / `TokenRecipient` alone don't justify
// foreign-id mutations — `has_prior_subject_auth` already filters
// those kinds out. Seeding on those would silently authorize
// helpers reachable from a login-only route.
let is_seed_kind = |k: AuthCheckKind| {
!matches!(
k,
AuthCheckKind::LoginGuard | AuthCheckKind::TokenExpiry | AuthCheckKind::TokenRecipient
)
};
let mut authorized: HashSet<usize> = (0..model.units.len())
.filter(|i| {
model.units[*i]
.auth_checks
.iter()
.any(|c| c.is_route_level && is_seed_kind(c.kind))
})
.collect();
// Lift ALL route-level non-Login auth checks once a unit is
// authorized, including `TokenExpiry` / `TokenRecipient`. Those
// kinds are required by `check_token_override_without_validation`
// (which gates separately from `has_prior_subject_auth`); without
// them the callee fires `token_override_without_validation` even
// after `missing_ownership_check` is suppressed. `LoginGuard` is
// still excluded — it's too weak to count as a coverage proof for
// either downstream check.
let unit_route_level_checks: Vec<Vec<AuthCheck>> = model
.units
.iter()
.map(|unit| {
unit.auth_checks
.iter()
.filter(|c| c.is_route_level && c.kind != AuthCheckKind::LoginGuard)
.cloned()
.collect::<Vec<_>>()
})
.collect();
// Per-callee aggregated lift checks, populated as we authorize.
// Stored separately so we can apply mutations after the fixpoint
// loop without invalidating immutable borrows above.
let mut helper_lift: HashMap<usize, Vec<AuthCheck>> = HashMap::new();
const MAX_ROUNDS: usize = 4;
for _ in 0..MAX_ROUNDS {
let mut grew = false;
for (callee_idx, callers) in unit_callers.iter().enumerate().take(model.units.len()) {
if authorized.contains(&callee_idx) {
continue;
}
if callers.is_empty() {
continue;
}
if !callers.iter().all(|c| authorized.contains(c)) {
continue;
}
// Aggregate the route-level checks from every authorized
// caller. Non-route-handler callers contribute nothing
// (their `unit_route_level_checks[c]` is empty by
// construction) — only route handlers up the chain seed
// real route-level checks, and downstream helpers
// propagate those forward via the `is_route_level=true`
// flag on the synthetic checks.
let mut chosen: Vec<AuthCheck> = Vec::new();
for &caller_idx in callers {
for check in &unit_route_level_checks[caller_idx] {
chosen.push(check.clone());
}
if let Some(prior) = helper_lift.get(&caller_idx) {
for check in prior {
chosen.push(check.clone());
}
}
}
if chosen.is_empty() {
continue;
}
authorized.insert(callee_idx);
helper_lift.insert(callee_idx, chosen);
grew = true;
}
if !grew {
break;
}
}
for (callee_idx, checks) in helper_lift {
let unit = &mut model.units[callee_idx];
let mut existing_keys: HashSet<((usize, usize), AuthCheckKind, String)> = unit
.auth_checks
.iter()
.map(|c| (c.span, c.kind, c.callee.clone()))
.collect();
for check in checks {
let mut synth = check;
// Re-anchor at the callee's start line so the
// `check.line <= op.line` gate in `has_prior_subject_auth`
// covers every operation inside the callee. Without this
// re-anchor, the synthetic check carries the caller's line
// (which is greater than the callee's body lines) and
// doesn't gate any of the callee's sinks.
synth.line = unit.line;
synth.callee = format!("(caller-scope lift {})", synth.callee);
let key = (synth.span, synth.kind, synth.callee.clone());
if existing_keys.insert(key) {
unit.auth_checks.push(synth);
}
}
}
}
/// Build a `name → AuthCheckSummary` map by walking each unit's auth
/// checks and recording, for every check subject whose value-ref name
/// matches a positional parameter name of the unit, that param index
@ -742,11 +1042,14 @@ fn auth_finding_to_diag(finding: &checks::AuthFinding, tree: &Tree, file_path: &
#[cfg(test)]
mod tests {
use super::{VarTypes, apply_var_types_to_model, receiver_root, sink_class_for_type};
use super::{
VarTypes, apply_caller_scope_propagation, apply_var_types_to_model, receiver_root,
sink_class_for_type,
};
use crate::auth_analysis::config::build_auth_rules;
use crate::auth_analysis::model::{
AnalysisUnit, AnalysisUnitKind, AuthorizationModel, OperationKind, SensitiveOperation,
SinkClass,
AnalysisUnit, AnalysisUnitKind, AuthCheck, AuthCheckKind, AuthorizationModel, CallSite,
OperationKind, SensitiveOperation, SinkClass,
};
use crate::ssa::type_facts::TypeKind;
use crate::utils::config::Config;
@ -868,6 +1171,239 @@ mod tests {
);
}
/// Build a synthetic [`AnalysisUnit`] with the given kind, name,
/// and call_site leaf names. No operations or auth_checks; tests
/// add those explicitly.
fn unit_with_calls(kind: AnalysisUnitKind, name: &str, callees: &[&str]) -> AnalysisUnit {
AnalysisUnit {
kind,
name: Some(name.into()),
span: (0, 0),
params: Vec::new(),
context_inputs: Vec::new(),
call_sites: callees
.iter()
.map(|c| CallSite {
name: (*c).to_string(),
args: Vec::new(),
span: (0, 0),
args_value_refs: Vec::new(),
})
.collect(),
auth_checks: Vec::new(),
operations: Vec::new(),
value_refs: Vec::new(),
condition_texts: Vec::new(),
line: 1,
row_field_vars: HashMap::new(),
var_alias_chain: HashMap::new(),
row_population_data: HashMap::new(),
self_actor_vars: HashSet::new(),
self_actor_id_vars: HashSet::new(),
authorized_sql_vars: HashSet::new(),
const_bound_vars: HashSet::new(),
typed_bounded_vars: HashSet::new(),
typed_bounded_dto_fields: HashMap::new(),
self_scoped_session_bases: HashSet::new(),
}
}
fn route_level_check(kind: AuthCheckKind) -> AuthCheck {
AuthCheck {
kind,
callee: "Security(require_auth)".into(),
subjects: Vec::new(),
span: (10, 11),
line: 1,
args: Vec::new(),
condition_text: None,
is_route_level: true,
}
}
#[test]
fn caller_scope_propagation_lifts_route_level_other_to_callee_helper() {
// Mirrors the airflow shape:
// route handler `ti_update_state` carries route-level Other
// (from scoped Security dep), calls `_create_state_update`
// (helper); helper's body sinks should inherit the lift.
let mut model = AuthorizationModel::default();
let mut handler = unit_with_calls(
AnalysisUnitKind::RouteHandler,
"ti_update_state",
&["_create_state_update"],
);
handler
.auth_checks
.push(route_level_check(AuthCheckKind::Other));
handler
.auth_checks
.push(route_level_check(AuthCheckKind::TokenExpiry));
handler
.auth_checks
.push(route_level_check(AuthCheckKind::TokenRecipient));
let helper = unit_with_calls(AnalysisUnitKind::Function, "_create_state_update", &[]);
model.units.push(handler);
model.units.push(helper);
apply_caller_scope_propagation(&mut model);
// Helper now has 3 lifted auth checks (Other + TokenExpiry +
// TokenRecipient), each with `is_route_level=true` and line
// re-anchored to helper's start line.
let helper = &model.units[1];
let kinds: HashSet<AuthCheckKind> = helper.auth_checks.iter().map(|c| c.kind).collect();
assert!(
kinds.contains(&AuthCheckKind::Other),
"helper should inherit Other check from caller"
);
assert!(
kinds.contains(&AuthCheckKind::TokenExpiry),
"helper should inherit TokenExpiry check (needed for token_override suppression)"
);
assert!(
kinds.contains(&AuthCheckKind::TokenRecipient),
"helper should inherit TokenRecipient check"
);
assert!(
helper.auth_checks.iter().all(|c| c.is_route_level),
"lifted checks must keep is_route_level=true"
);
assert!(
helper.auth_checks.iter().all(|c| c.line == helper.line),
"lifted check.line must match callee unit start so check.line <= op.line holds"
);
}
#[test]
fn caller_scope_propagation_refuses_when_helper_has_unauthorized_caller() {
// Helper is called from BOTH an authorized route handler AND
// a bare (no-auth) route handler. Soundness rule: if any
// caller is unauthorized, do NOT propagate — the unauthorized
// path is the real attack surface.
let mut model = AuthorizationModel::default();
let mut authed = unit_with_calls(
AnalysisUnitKind::RouteHandler,
"ti_update_state",
&["_create_state_update"],
);
authed
.auth_checks
.push(route_level_check(AuthCheckKind::Other));
let bare = unit_with_calls(
AnalysisUnitKind::RouteHandler,
"ti_overwrite_state",
&["_create_state_update"],
);
let helper = unit_with_calls(AnalysisUnitKind::Function, "_create_state_update", &[]);
model.units.push(authed);
model.units.push(bare);
model.units.push(helper);
apply_caller_scope_propagation(&mut model);
let helper = &model.units[2];
assert!(
helper.auth_checks.is_empty(),
"helper must not be authorized when one caller has no route-level auth"
);
}
#[test]
fn caller_scope_propagation_refuses_when_helper_has_no_callers() {
// Dead helper — no in-file caller. Could be invoked via CLI
// / test / cron / external import. Stay conservative.
let mut model = AuthorizationModel::default();
let helper = unit_with_calls(AnalysisUnitKind::Function, "_orphan_helper", &[]);
model.units.push(helper);
apply_caller_scope_propagation(&mut model);
let helper = &model.units[0];
assert!(
helper.auth_checks.is_empty(),
"helper with no in-file callers must not be authorized"
);
}
#[test]
fn caller_scope_propagation_transitive_chain_route_to_mid_to_leaf() {
// route → mid_helper → leaf_helper. Both helpers should be
// authorized in two BFS rounds: round 1 lifts onto mid, round
// 2 sees mid as authorized and lifts onto leaf.
let mut model = AuthorizationModel::default();
let mut handler = unit_with_calls(
AnalysisUnitKind::RouteHandler,
"ti_update_state",
&["_mid_helper"],
);
handler
.auth_checks
.push(route_level_check(AuthCheckKind::Other));
let mid = unit_with_calls(AnalysisUnitKind::Function, "_mid_helper", &["_leaf_helper"]);
let leaf = unit_with_calls(AnalysisUnitKind::Function, "_leaf_helper", &[]);
model.units.push(handler);
model.units.push(mid);
model.units.push(leaf);
apply_caller_scope_propagation(&mut model);
let mid_kinds: HashSet<AuthCheckKind> =
model.units[1].auth_checks.iter().map(|c| c.kind).collect();
let leaf_kinds: HashSet<AuthCheckKind> =
model.units[2].auth_checks.iter().map(|c| c.kind).collect();
assert!(
mid_kinds.contains(&AuthCheckKind::Other),
"mid helper should be authorized in round 1"
);
assert!(
leaf_kinds.contains(&AuthCheckKind::Other),
"leaf helper should be authorized in round 2 via the lifted mid"
);
}
#[test]
fn caller_scope_propagation_does_not_seed_on_loginguard_only_route() {
// Route handler with ONLY a LoginGuard route-level check.
// LoginGuard alone proves identity, not authority — must not
// seed the helper.
let mut model = AuthorizationModel::default();
let mut handler =
unit_with_calls(AnalysisUnitKind::RouteHandler, "list_things", &["_helper"]);
handler
.auth_checks
.push(route_level_check(AuthCheckKind::LoginGuard));
let helper = unit_with_calls(AnalysisUnitKind::Function, "_helper", &[]);
model.units.push(handler);
model.units.push(helper);
apply_caller_scope_propagation(&mut model);
let helper = &model.units[1];
assert!(
helper.auth_checks.is_empty(),
"LoginGuard alone must not seed the helper"
);
}
#[test]
fn caller_scope_propagation_skips_self_recursive_call() {
// Recursive helper that calls itself. The self-edge is
// skipped in `unit_callers` construction so the helper has
// zero in-file callers and stays unauthorized.
let mut model = AuthorizationModel::default();
let helper = unit_with_calls(AnalysisUnitKind::Function, "recurse", &["recurse"]);
model.units.push(helper);
apply_caller_scope_propagation(&mut model);
let helper = &model.units[0];
assert!(
helper.auth_checks.is_empty(),
"self-recursive helper with no other callers must not be authorized"
);
}
#[test]
fn apply_var_types_leaves_classification_untouched_when_receiver_unknown() {
let cfg = Config::default();