mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-15 20:05:13 +02:00
Authorization analysis logic improvements (#61)
This commit is contained in:
parent
3c89bddbf2
commit
40995e45e7
55 changed files with 4193 additions and 134 deletions
|
|
@ -1102,6 +1102,7 @@ impl<'a> ParsedFile<'a> {
|
|||
if !missing.is_empty() {
|
||||
let aug_ctx = crate::utils::project::FrameworkContext {
|
||||
frameworks: missing.clone(),
|
||||
inspected_langs: std::collections::HashSet::new(),
|
||||
};
|
||||
lang_rules
|
||||
.extra_labels
|
||||
|
|
|
|||
|
|
@ -15,11 +15,14 @@ pub struct AuthFinding {
|
|||
|
||||
pub fn run_checks(model: &AuthorizationModel, rules: &AuthAnalysisRules) -> Vec<AuthFinding> {
|
||||
let mut findings = Vec::new();
|
||||
let web_signal = model.lang_web_framework_signal;
|
||||
findings.extend(check_admin_routes(model, rules));
|
||||
findings.extend(check_ownership_gaps(model, rules));
|
||||
findings.extend(check_partial_batch_authorization(model, rules));
|
||||
findings.extend(check_stale_authorization(model, rules));
|
||||
findings.extend(check_token_override_without_validation(model, rules));
|
||||
findings.extend(check_ownership_gaps(model, rules, web_signal));
|
||||
findings.extend(check_partial_batch_authorization(model, rules, web_signal));
|
||||
findings.extend(check_stale_authorization(model, rules, web_signal));
|
||||
findings.extend(check_token_override_without_validation(
|
||||
model, rules, web_signal,
|
||||
));
|
||||
findings.sort_by(|a, b| a.span.cmp(&b.span).then_with(|| a.rule_id.cmp(&b.rule_id)));
|
||||
findings.dedup_by(|a, b| a.span == b.span && a.rule_id == b.rule_id);
|
||||
findings
|
||||
|
|
@ -63,11 +66,15 @@ fn check_admin_routes(model: &AuthorizationModel, rules: &AuthAnalysisRules) ->
|
|||
findings
|
||||
}
|
||||
|
||||
fn check_ownership_gaps(model: &AuthorizationModel, rules: &AuthAnalysisRules) -> Vec<AuthFinding> {
|
||||
fn check_ownership_gaps(
|
||||
model: &AuthorizationModel,
|
||||
rules: &AuthAnalysisRules,
|
||||
web_signal: Option<bool>,
|
||||
) -> Vec<AuthFinding> {
|
||||
let mut findings = Vec::new();
|
||||
|
||||
for unit in &model.units {
|
||||
if !unit_has_user_input_evidence(unit) {
|
||||
if !unit_has_user_input_evidence(unit, web_signal) {
|
||||
continue;
|
||||
}
|
||||
for op in &unit.operations {
|
||||
|
|
@ -115,11 +122,12 @@ fn check_ownership_gaps(model: &AuthorizationModel, rules: &AuthAnalysisRules) -
|
|||
fn check_partial_batch_authorization(
|
||||
model: &AuthorizationModel,
|
||||
rules: &AuthAnalysisRules,
|
||||
web_signal: Option<bool>,
|
||||
) -> Vec<AuthFinding> {
|
||||
let mut findings = Vec::new();
|
||||
|
||||
for unit in &model.units {
|
||||
if !unit_has_user_input_evidence(unit) {
|
||||
if !unit_has_user_input_evidence(unit, web_signal) {
|
||||
continue;
|
||||
}
|
||||
for op in &unit.operations {
|
||||
|
|
@ -169,11 +177,12 @@ fn check_partial_batch_authorization(
|
|||
fn check_stale_authorization(
|
||||
model: &AuthorizationModel,
|
||||
rules: &AuthAnalysisRules,
|
||||
web_signal: Option<bool>,
|
||||
) -> Vec<AuthFinding> {
|
||||
let mut findings = Vec::new();
|
||||
|
||||
for unit in &model.units {
|
||||
if !unit_has_user_input_evidence(unit) {
|
||||
if !unit_has_user_input_evidence(unit, web_signal) {
|
||||
continue;
|
||||
}
|
||||
for op in unit.operations.iter().filter(|operation| {
|
||||
|
|
@ -216,6 +225,7 @@ fn check_stale_authorization(
|
|||
fn check_token_override_without_validation(
|
||||
model: &AuthorizationModel,
|
||||
rules: &AuthAnalysisRules,
|
||||
web_signal: Option<bool>,
|
||||
) -> Vec<AuthFinding> {
|
||||
let mut findings = Vec::new();
|
||||
|
||||
|
|
@ -229,7 +239,7 @@ fn check_token_override_without_validation(
|
|||
// call shape happens to look token-y (`account.token = …;
|
||||
// account.save()`). Gate on positive user-input evidence so
|
||||
// these pure backend units are never claimed as a token flow.
|
||||
if !unit_has_user_input_evidence(unit) {
|
||||
if !unit_has_user_input_evidence(unit, web_signal) {
|
||||
continue;
|
||||
}
|
||||
let Some(token_lookup) = unit
|
||||
|
|
@ -600,6 +610,82 @@ fn is_relevant_target_subject(subject: &ValueRef, unit: &AnalysisUnit) -> bool {
|
|||
&& !is_actor_context_subject(subject, unit)
|
||||
&& !is_const_bound_subject(subject, unit)
|
||||
&& !is_typed_bounded_subject(subject, unit)
|
||||
&& !is_caller_scope_entity_subject(subject, unit)
|
||||
}
|
||||
|
||||
/// True iff `subject` is a member-access of form `<entity>.id` /
|
||||
/// `<entity>.pk` whose root identifier is a unit parameter named after
|
||||
/// a scope-bearing domain entity (`organization`, `project`, `team`,
|
||||
/// `workspace`, `tenant`, `account`, `community`, `repository`, …).
|
||||
///
|
||||
/// Such subjects are the *scope* of the operation — the ownership
|
||||
/// constraint the caller passed in — not a user-controlled target.
|
||||
/// Helpers like
|
||||
/// `def get_environments(request, organization: Organization): …
|
||||
/// Environment.objects.filter(organization_id=organization.id, …)`
|
||||
/// inherit the caller's authorization on the entity object; the call
|
||||
/// itself enforces tenant scoping. Without this exemption, every
|
||||
/// internal helper in a multi-tenant Django/Rails/Laravel codebase
|
||||
/// flags `missing_ownership_check` because the engine cannot tell
|
||||
/// "scoping arg" from "user-targeted arg".
|
||||
///
|
||||
/// Conservative scope:
|
||||
/// * Field must be `id` or `pk` (the canonical primary-key fields).
|
||||
/// `entity.name` / `entity.slug` are deliberately excluded — those
|
||||
/// could be user-supplied display strings even on a typed entity.
|
||||
/// * Root must be exactly a unit parameter (not a derived local).
|
||||
/// * Root name must be in the scope-entity vocabulary. Names like
|
||||
/// `user`, `member`, `actor` are deliberately omitted: those carry
|
||||
/// actor semantics and are handled separately by
|
||||
/// `is_actor_context_subject`.
|
||||
fn is_caller_scope_entity_subject(subject: &ValueRef, unit: &AnalysisUnit) -> bool {
|
||||
let Some(field) = subject.field.as_deref() else {
|
||||
return false;
|
||||
};
|
||||
let field_lower = field.to_ascii_lowercase();
|
||||
if !matches!(field_lower.as_str(), "id" | "pk") {
|
||||
return false;
|
||||
}
|
||||
let Some(base) = subject.base.as_deref() else {
|
||||
return false;
|
||||
};
|
||||
let root = base.split('.').next().unwrap_or(base);
|
||||
if !is_caller_scope_entity_name(root) {
|
||||
return false;
|
||||
}
|
||||
unit.params.iter().any(|p| p == root)
|
||||
}
|
||||
|
||||
/// Recognises parameter names that conventionally carry a *scope*
|
||||
/// entity — the multi-tenant ownership boundary inherited from the
|
||||
/// caller — rather than a user-controlled target identifier. Used
|
||||
/// only by `is_caller_scope_entity_subject` to suppress
|
||||
/// `missing_ownership_check` on `<entity>.id` arguments to ORM /
|
||||
/// query / mutation calls.
|
||||
///
|
||||
/// Vocabulary matches the canonical multi-tenant primitives across
|
||||
/// Django (Sentry, Saleor), Rails (Discourse, Mastodon), and Laravel
|
||||
/// / Symfony idioms. Both singular and short forms are matched
|
||||
/// (`organization` / `org`, `repository` / `repo`). Excluded:
|
||||
/// `user`, `member`, `actor` (actor semantics, covered by
|
||||
/// `is_actor_context_subject` and per-actor self-id detectors).
|
||||
fn is_caller_scope_entity_name(name: &str) -> bool {
|
||||
let lower = name.to_ascii_lowercase();
|
||||
matches!(
|
||||
lower.as_str(),
|
||||
"organization"
|
||||
| "org"
|
||||
| "project"
|
||||
| "team"
|
||||
| "workspace"
|
||||
| "tenant"
|
||||
| "account"
|
||||
| "community"
|
||||
| "group"
|
||||
| "repository"
|
||||
| "repo"
|
||||
| "company"
|
||||
)
|
||||
}
|
||||
|
||||
/// True iff `subject` is a plain identifier whose declaration binds
|
||||
|
|
@ -852,10 +938,25 @@ fn is_id_like_name(name: &str) -> bool {
|
|||
/// pure utility helpers fail all three conditions and are skipped ,
|
||||
/// they cannot, by construction, be the entry point of an
|
||||
/// authentication-bearing flow.
|
||||
fn unit_has_user_input_evidence(unit: &AnalysisUnit) -> bool {
|
||||
fn unit_has_user_input_evidence(unit: &AnalysisUnit, web_signal: Option<bool>) -> bool {
|
||||
if unit.kind == AnalysisUnitKind::RouteHandler {
|
||||
return true;
|
||||
}
|
||||
// Project-level web-framework gate. When the project's manifest
|
||||
// was inspected and named no web framework matching the file's
|
||||
// language, AND no per-file import override applied, the file
|
||||
// lives in a project with no HTTP boundary. Step 2 (context
|
||||
// inputs) and step 3 (param-name heuristic) are both name-shape
|
||||
// heuristics that overshoot in non-web Rust crates ─ e.g. zed's
|
||||
// GUI test code where `session.update(cx, ...)` (a debug-session
|
||||
// handle, not an auth session) trips `matches_session_context`
|
||||
// and lands in `context_inputs`, opening every test method's
|
||||
// sinks. Refuse here, after the RouteHandler step (which is
|
||||
// determined by framework extractors and is robust evidence on
|
||||
// its own).
|
||||
if web_signal == Some(false) {
|
||||
return false;
|
||||
}
|
||||
if !unit.context_inputs.is_empty() {
|
||||
return true;
|
||||
}
|
||||
|
|
@ -934,8 +1035,9 @@ fn is_batch_collection(subject: &ValueRef) -> bool {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{
|
||||
auth_check_covers_subject, is_actor_context_subject, is_external_input_param_name,
|
||||
is_relevant_target_subject, unit_has_user_input_evidence,
|
||||
auth_check_covers_subject, is_actor_context_subject, is_caller_scope_entity_name,
|
||||
is_caller_scope_entity_subject, is_external_input_param_name, is_relevant_target_subject,
|
||||
unit_has_user_input_evidence,
|
||||
};
|
||||
use crate::auth_analysis::model::{AnalysisUnit, AnalysisUnitKind, ValueRef, ValueSourceKind};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
|
@ -1083,6 +1185,146 @@ mod tests {
|
|||
assert!(is_relevant_target_subject(&member("req", "id"), &unit));
|
||||
}
|
||||
|
||||
/// Real-repo regression: caller-passed scope entity used as
|
||||
/// ownership constraint (sentry api/helpers/environments.py
|
||||
/// `get_environments(request, organization)` and
|
||||
/// api/endpoints/organization_releases.py
|
||||
/// `_filter_releases_by_query(queryset, organization, query, ...)`).
|
||||
/// The helper inherits the caller's auth on the entity object;
|
||||
/// the `<entity>.id` arg IS the ownership scope, not a target.
|
||||
#[test]
|
||||
fn caller_scope_entity_subject_recognises_unit_param_id() {
|
||||
let mut unit = empty_unit();
|
||||
unit.params.push("organization".into());
|
||||
|
||||
// `organization.id` where `organization` is a unit param and
|
||||
// matches the scope-entity vocabulary -> recognised as scope.
|
||||
assert!(is_caller_scope_entity_subject(
|
||||
&member("organization", "id"),
|
||||
&unit
|
||||
));
|
||||
assert!(is_caller_scope_entity_subject(
|
||||
&member("organization", "pk"),
|
||||
&unit
|
||||
));
|
||||
// Suppression flows through to `is_relevant_target_subject`.
|
||||
assert!(!is_relevant_target_subject(
|
||||
&member("organization", "id"),
|
||||
&unit
|
||||
));
|
||||
|
||||
// Other scope-entity names: project, team, workspace, ...
|
||||
let mut unit_p = empty_unit();
|
||||
unit_p.params.push("project".into());
|
||||
assert!(is_caller_scope_entity_subject(
|
||||
&member("project", "id"),
|
||||
&unit_p
|
||||
));
|
||||
|
||||
let mut unit_t = empty_unit();
|
||||
unit_t.params.push("team".into());
|
||||
assert!(is_caller_scope_entity_subject(
|
||||
&member("team", "id"),
|
||||
&unit_t
|
||||
));
|
||||
|
||||
let mut unit_w = empty_unit();
|
||||
unit_w.params.push("workspace".into());
|
||||
assert!(is_caller_scope_entity_subject(
|
||||
&member("workspace", "id"),
|
||||
&unit_w
|
||||
));
|
||||
|
||||
let mut unit_r = empty_unit();
|
||||
unit_r.params.push("repo".into());
|
||||
assert!(is_caller_scope_entity_subject(
|
||||
&member("repo", "id"),
|
||||
&unit_r
|
||||
));
|
||||
}
|
||||
|
||||
/// Pitfall guards for `is_caller_scope_entity_subject`.
|
||||
#[test]
|
||||
fn caller_scope_entity_subject_does_not_overreach() {
|
||||
// `organization` not declared as a unit param -> not exempt.
|
||||
let unit = empty_unit();
|
||||
assert!(!is_caller_scope_entity_subject(
|
||||
&member("organization", "id"),
|
||||
&unit
|
||||
));
|
||||
|
||||
// Field other than id/pk -> not exempt (could be display name).
|
||||
let mut unit = empty_unit();
|
||||
unit.params.push("organization".into());
|
||||
assert!(!is_caller_scope_entity_subject(
|
||||
&member("organization", "name"),
|
||||
&unit
|
||||
));
|
||||
assert!(!is_caller_scope_entity_subject(
|
||||
&member("organization", "slug"),
|
||||
&unit
|
||||
));
|
||||
|
||||
// `user.id` / `member.id` / `actor.id` are deliberately NOT
|
||||
// recognised as scope entities (actor semantics, handled by
|
||||
// is_actor_context_subject). They must not be widened here.
|
||||
let mut unit_u = empty_unit();
|
||||
unit_u.params.push("user".into());
|
||||
assert!(!is_caller_scope_entity_subject(
|
||||
&member("user", "id"),
|
||||
&unit_u
|
||||
));
|
||||
|
||||
let mut unit_m = empty_unit();
|
||||
unit_m.params.push("member".into());
|
||||
assert!(!is_caller_scope_entity_subject(
|
||||
&member("member", "id"),
|
||||
&unit_m
|
||||
));
|
||||
|
||||
// Bare identifier -> not exempt (no field).
|
||||
let mut unit_b = empty_unit();
|
||||
unit_b.params.push("organization".into());
|
||||
assert!(!is_caller_scope_entity_subject(
|
||||
&plain("organization"),
|
||||
&unit_b
|
||||
));
|
||||
}
|
||||
|
||||
/// Vocabulary check for `is_caller_scope_entity_name`. Pinned so
|
||||
/// future widening is intentional.
|
||||
#[test]
|
||||
fn caller_scope_entity_name_vocabulary() {
|
||||
// Recognised scope entities.
|
||||
for name in [
|
||||
"organization",
|
||||
"Organization",
|
||||
"ORG",
|
||||
"project",
|
||||
"team",
|
||||
"workspace",
|
||||
"tenant",
|
||||
"account",
|
||||
"community",
|
||||
"group",
|
||||
"repository",
|
||||
"repo",
|
||||
"company",
|
||||
] {
|
||||
assert!(
|
||||
is_caller_scope_entity_name(name),
|
||||
"expected {name} to be recognised as scope entity"
|
||||
);
|
||||
}
|
||||
// Excluded (actor semantics or generic).
|
||||
for name in ["user", "member", "actor", "request", "self", "ctx"] {
|
||||
assert!(
|
||||
!is_caller_scope_entity_name(name),
|
||||
"expected {name} NOT to be recognised as scope entity"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Hierarchy: a parameter whose
|
||||
/// static type was recovered as `Int`/`Bool` (Spring `Long userId`,
|
||||
/// Axum `Path<i64>`, FastAPI `user_id: int`) has its name added to
|
||||
|
|
@ -1119,23 +1361,23 @@ mod tests {
|
|||
// Function with no params and no context_inputs (Celery task
|
||||
// shape), must NOT count as user-input-bearing.
|
||||
let mut unit = empty_unit();
|
||||
assert!(!unit_has_user_input_evidence(&unit));
|
||||
assert!(!unit_has_user_input_evidence(&unit, None));
|
||||
|
||||
// Adding internal-typed params (apps, schema_editor, Django
|
||||
// migration RunPython callback shape) keeps the gate closed.
|
||||
unit.params.push("apps".into());
|
||||
unit.params.push("schema_editor".into());
|
||||
assert!(!unit_has_user_input_evidence(&unit));
|
||||
assert!(!unit_has_user_input_evidence(&unit, None));
|
||||
|
||||
// pytest hook shape: (config, items), gate stays closed.
|
||||
let mut unit = empty_unit();
|
||||
unit.params.push("config".into());
|
||||
unit.params.push("items".into());
|
||||
assert!(!unit_has_user_input_evidence(&unit));
|
||||
assert!(!unit_has_user_input_evidence(&unit, None));
|
||||
|
||||
// Adding an id-like param flips the gate open.
|
||||
unit.params.push("doc_id".into());
|
||||
assert!(unit_has_user_input_evidence(&unit));
|
||||
assert!(unit_has_user_input_evidence(&unit, None));
|
||||
|
||||
// Token-named param flips the gate open (Express helper
|
||||
// `acceptInvitation(token, currentUser, roleOverride)`).
|
||||
|
|
@ -1143,23 +1385,72 @@ mod tests {
|
|||
unit.params.push("token".into());
|
||||
unit.params.push("currentUser".into());
|
||||
unit.params.push("roleOverride".into());
|
||||
assert!(unit_has_user_input_evidence(&unit));
|
||||
assert!(unit_has_user_input_evidence(&unit, None));
|
||||
|
||||
// Framework request-name param flips the gate open
|
||||
// (Django/Flask `def view(request, project_id):`).
|
||||
let mut unit = empty_unit();
|
||||
unit.params.push("request".into());
|
||||
assert!(unit_has_user_input_evidence(&unit));
|
||||
assert!(unit_has_user_input_evidence(&unit, None));
|
||||
|
||||
// Axum/Actix typed-extractor convention name flips it open.
|
||||
let mut unit = empty_unit();
|
||||
unit.params.push("path".into());
|
||||
assert!(unit_has_user_input_evidence(&unit));
|
||||
assert!(unit_has_user_input_evidence(&unit, None));
|
||||
|
||||
// RouteHandler kind always wins, regardless of params.
|
||||
let mut unit = empty_unit();
|
||||
unit.kind = AnalysisUnitKind::RouteHandler;
|
||||
assert!(unit_has_user_input_evidence(&unit));
|
||||
assert!(unit_has_user_input_evidence(&unit, None));
|
||||
}
|
||||
|
||||
/// Web-framework signal `Some(false)` (project's manifest was
|
||||
/// inspected and named no web framework matching the file's
|
||||
/// language, AND no per-file import override) suppresses both
|
||||
/// the `context_inputs` arm and the param-name arm — both are
|
||||
/// name-shape heuristics that overshoot in non-web Rust crates
|
||||
/// (e.g. a debug-session handle named `session` trips
|
||||
/// `matches_session_context` and lands in `context_inputs`).
|
||||
/// Only RouteHandler classification (step 1) survives the gate
|
||||
/// because that flag is set by framework extractors with concrete
|
||||
/// route-registration evidence.
|
||||
#[test]
|
||||
fn web_framework_signal_gates_user_input_heuristics() {
|
||||
// Param-name arm: helper named `<thing>_id` in a project the
|
||||
// auth detector confirmed has no Rust web framework. Without
|
||||
// the gate this would flip step 3 open and flood the rule on
|
||||
// every desktop helper.
|
||||
let mut unit = empty_unit();
|
||||
unit.params.push("session_id".into());
|
||||
assert!(unit_has_user_input_evidence(&unit, None));
|
||||
assert!(unit_has_user_input_evidence(&unit, Some(true)));
|
||||
assert!(!unit_has_user_input_evidence(&unit, Some(false)));
|
||||
|
||||
// Step 1 (RouteHandler) still wins regardless of the gate.
|
||||
// RouteHandler kind is set by framework extractors (axum /
|
||||
// actix_web / rocket) on concrete route-registration shapes —
|
||||
// robust enough to bypass the project-level gate even when
|
||||
// the manifest doesn't name the framework.
|
||||
unit.kind = AnalysisUnitKind::RouteHandler;
|
||||
assert!(unit_has_user_input_evidence(&unit, Some(false)));
|
||||
|
||||
// context_inputs arm: bare `session.foo` on a debug-session
|
||||
// handle (not an auth session) lands in `context_inputs` via
|
||||
// `matches_session_context`. The gate suppresses this so
|
||||
// non-web Rust crates don't fire on `session.update(cx, ...)`
|
||||
// shapes from desktop test code.
|
||||
let mut unit = empty_unit();
|
||||
unit.context_inputs.push(ValueRef {
|
||||
source_kind: ValueSourceKind::Session,
|
||||
name: "session.update".into(),
|
||||
base: Some("session".into()),
|
||||
field: Some("update".into()),
|
||||
index: None,
|
||||
span: (0, 0),
|
||||
});
|
||||
assert!(unit_has_user_input_evidence(&unit, None));
|
||||
assert!(unit_has_user_input_evidence(&unit, Some(true)));
|
||||
assert!(!unit_has_user_input_evidence(&unit, Some(false)));
|
||||
}
|
||||
|
||||
/// `is_external_input_param_name` covers id-, token-, and
|
||||
|
|
|
|||
|
|
@ -9,6 +9,17 @@ pub struct AuthAnalysisRules {
|
|||
pub admin_path_patterns: Vec<String>,
|
||||
pub admin_guard_names: Vec<String>,
|
||||
pub login_guard_names: Vec<String>,
|
||||
/// Typed-extractor wrapper names that carry route-level
|
||||
/// authorization (capability/policy enforcement) rather than mere
|
||||
/// authentication. Match by `matches_name` (last-segment +
|
||||
/// case-insensitive `starts_with`), so a single pattern like
|
||||
/// `"Guarded"` covers `Guarded`, `GuardedData`, `GuardedRoute`.
|
||||
/// Consulted only by `inject_guard_checks` for typed-extractor
|
||||
/// route-level injection — distinct from `login_guard_names` /
|
||||
/// `admin_guard_names` so the pattern doesn't pollute regular call
|
||||
/// recognition (where a function like `guarded_load(..)` would
|
||||
/// otherwise be wrongly classified as a login guard).
|
||||
pub policy_guard_names: Vec<String>,
|
||||
pub authorization_check_names: Vec<String>,
|
||||
pub mutation_indicator_names: Vec<String>,
|
||||
pub read_indicator_names: Vec<String>,
|
||||
|
|
@ -54,6 +65,7 @@ impl AuthAnalysisRules {
|
|||
admin_path_patterns: Vec::new(),
|
||||
admin_guard_names: Vec::new(),
|
||||
login_guard_names: Vec::new(),
|
||||
policy_guard_names: Vec::new(),
|
||||
authorization_check_names: Vec::new(),
|
||||
mutation_indicator_names: Vec::new(),
|
||||
read_indicator_names: Vec::new(),
|
||||
|
|
@ -353,6 +365,19 @@ impl AuthAnalysisRules {
|
|||
.any(|pattern| matches_name(name, pattern))
|
||||
}
|
||||
|
||||
/// Typed-extractor wrapper that proves the request passed a
|
||||
/// route-level capability/policy check (e.g. meilisearch's
|
||||
/// `GuardedData<ActionPolicy<X>, _>`). Distinct from
|
||||
/// `is_login_guard` because policy enforcement is more than mere
|
||||
/// authentication, it includes the per-action permission decision
|
||||
/// the Policy term encodes. Used only by `inject_guard_checks`
|
||||
/// for typed-extractor route-level injection.
|
||||
pub fn is_policy_guard(&self, name: &str) -> bool {
|
||||
self.policy_guard_names
|
||||
.iter()
|
||||
.any(|pattern| matches_name(name, pattern))
|
||||
}
|
||||
|
||||
pub fn is_authorization_check(&self, name: &str) -> bool {
|
||||
if self
|
||||
.authorization_check_names
|
||||
|
|
@ -482,6 +507,7 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
|
|||
"ensure_authenticated".into(),
|
||||
"require_auth".into(),
|
||||
],
|
||||
policy_guard_names: Vec::new(),
|
||||
authorization_check_names: vec![
|
||||
"check_membership".into(),
|
||||
"has_membership".into(),
|
||||
|
|
@ -595,6 +621,7 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
|
|||
"login_required".into(),
|
||||
"login_required!".into(),
|
||||
],
|
||||
policy_guard_names: Vec::new(),
|
||||
authorization_check_names: vec![
|
||||
"authorize".into(),
|
||||
"authorize!".into(),
|
||||
|
|
@ -762,6 +789,7 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
|
|||
"requireAuth".into(),
|
||||
"ensureAuthenticated".into(),
|
||||
],
|
||||
policy_guard_names: Vec::new(),
|
||||
authorization_check_names: vec![
|
||||
"CheckMembership".into(),
|
||||
"HasMembership".into(),
|
||||
|
|
@ -853,6 +881,7 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
|
|||
"Authenticated".into(),
|
||||
"isAuthenticated".into(),
|
||||
],
|
||||
policy_guard_names: Vec::new(),
|
||||
authorization_check_names: vec![
|
||||
"checkMembership".into(),
|
||||
"hasMembership".into(),
|
||||
|
|
@ -951,6 +980,14 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
|
|||
"RequireLogin".into(),
|
||||
"RequireAuth".into(),
|
||||
],
|
||||
// `Guarded` (case-insensitive starts_with) recognises
|
||||
// typed-extractor wrappers like meilisearch's
|
||||
// `GuardedData<ActionPolicy<{ actions::KEYS_GET }>, _>` as
|
||||
// route-level policy guards (capability enforcement). The
|
||||
// wrapper proves the request passed a permission check, so
|
||||
// any sink in the handler is route-gated even when the
|
||||
// engine cannot model the inner Policy term.
|
||||
policy_guard_names: vec!["Guarded".into()],
|
||||
authorization_check_names: vec![
|
||||
"check_membership".into(),
|
||||
"has_membership".into(),
|
||||
|
|
@ -1120,6 +1157,7 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
|
|||
"ensureAuth".into(),
|
||||
"require_login".into(),
|
||||
],
|
||||
policy_guard_names: Vec::new(),
|
||||
authorization_check_names: vec![
|
||||
"checkMembership".into(),
|
||||
"hasWorkspaceMembership".into(),
|
||||
|
|
@ -1272,6 +1310,10 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
|
|||
&mut rules.login_guard_names,
|
||||
&lang_cfg.auth.login_guard_names,
|
||||
);
|
||||
extend_unique(
|
||||
&mut rules.policy_guard_names,
|
||||
&lang_cfg.auth.policy_guard_names,
|
||||
);
|
||||
extend_unique(
|
||||
&mut rules.authorization_check_names,
|
||||
&lang_cfg.auth.authorization_check_names,
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
use super::AuthExtractor;
|
||||
use super::axum::{
|
||||
GuardFramework, apply_aliases, dedup_call_sites, expanded_guard_call_sites,
|
||||
guard_calls_for_handler, inject_guard_checks, rust_param_aliases,
|
||||
GuardFramework, apply_aliases, apply_typed_extractor_guards_to_units, dedup_call_sites,
|
||||
expanded_guard_call_sites, guard_calls_for_handler, inject_guard_checks, rust_param_aliases,
|
||||
};
|
||||
use super::common::{
|
||||
attach_route_handler, call_name, collect_top_level_units, named_children, resolve_handler_node,
|
||||
|
|
@ -36,6 +36,13 @@ impl AuthExtractor for ActixWebExtractor {
|
|||
|
||||
collect_top_level_units(root, bytes, rules, &mut model);
|
||||
collect_routes(root, root, bytes, path, rules, &mut model);
|
||||
apply_typed_extractor_guards_to_units(
|
||||
root,
|
||||
bytes,
|
||||
rules,
|
||||
&mut model,
|
||||
GuardFramework::ActixWeb,
|
||||
);
|
||||
|
||||
model
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ impl AuthExtractor for AxumExtractor {
|
|||
|
||||
collect_top_level_units(root, bytes, rules, &mut model);
|
||||
collect_routes(root, root, bytes, path, rules, &mut model);
|
||||
apply_typed_extractor_guards_to_units(root, bytes, rules, &mut model, GuardFramework::Axum);
|
||||
|
||||
model
|
||||
}
|
||||
|
|
@ -391,7 +392,61 @@ fn classify_rocket_param(
|
|||
/// non-route functions, and a false positive there suppresses
|
||||
/// downstream `V.id` flagging entirely; that path uses a structural
|
||||
/// recogniser keyed on the `<PREFIX>User<SUFFIX>?` shape.
|
||||
///
|
||||
/// Recognition is **outer-wrapper based**: classify by the outermost
|
||||
/// type name only, not by substring-anywhere on the whole text. This
|
||||
/// avoids both directions of leakage:
|
||||
/// * A bare data-only extractor like `web::Path<u64>` early-returns
|
||||
/// `None` regardless of inner type tokens (preserves existing
|
||||
/// behaviour).
|
||||
/// * A policy-bearing wrapper like
|
||||
/// `GuardedData<ActionPolicy<X>, Data<AuthController>>` is
|
||||
/// classified by the outer `GuardedData`, not by whether the inner
|
||||
/// `Data<AuthController>` happens to lowercase-contain "auth". The
|
||||
/// wrapper proves capability enforcement → `AuthCheckKind::Other`
|
||||
/// (the route-level short-circuit in `auth_check_covers_subject`
|
||||
/// suppresses missing-ownership-check for non-LoginGuard kinds).
|
||||
fn classify_guard_type(type_text: &str) -> Option<AuthCheckKind> {
|
||||
let outer = outermost_type_name(type_text);
|
||||
let outer_lower = outer.to_ascii_lowercase();
|
||||
|
||||
// Bare data-only extractors are *not* auth-bearing regardless of
|
||||
// their inner generic args. Outer-name match (case-insensitive
|
||||
// exact) — `Path<u64>` / `web::Path<...>` / `Query<X>` /
|
||||
// `Json<X>` / `Form<X>` / `State<X>` / `Extension<X>` /
|
||||
// `Data<X>`.
|
||||
if is_data_only_extractor_outer(&outer_lower) {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Policy/guard-bearing outer wrapper. Names containing
|
||||
// `guarded` (e.g. `GuardedData`, `GuardedRoute`) signal the
|
||||
// wrapper enforced a capability/permission check at request
|
||||
// construction. Distinct from `LoginGuard` because Policy
|
||||
// enforcement is more than authentication, it's authorization.
|
||||
if outer_lower.contains("guarded") || outer_lower.contains("guard") {
|
||||
if outer_lower.contains("admin") {
|
||||
return Some(AuthCheckKind::AdminGuard);
|
||||
}
|
||||
return Some(AuthCheckKind::Other);
|
||||
}
|
||||
|
||||
if outer_lower.contains("admin") {
|
||||
return Some(AuthCheckKind::AdminGuard);
|
||||
}
|
||||
if outer_lower.contains("user")
|
||||
|| outer_lower.contains("auth")
|
||||
|| outer_lower.contains("session")
|
||||
|| outer_lower.contains("identity")
|
||||
|| outer_lower.contains("principal")
|
||||
{
|
||||
return Some(AuthCheckKind::LoginGuard);
|
||||
}
|
||||
|
||||
// Backwards-compat fallback: legacy whole-text substring check
|
||||
// for unusual shapes whose outer wrapper is generic but whose
|
||||
// qualified path still mentions an auth token. Preserves
|
||||
// pre-2026-05-02 behaviour for non-Guarded wrappers.
|
||||
let lower = type_text.to_ascii_lowercase();
|
||||
if is_extractor_wrapper(&lower) {
|
||||
return None;
|
||||
|
|
@ -409,6 +464,49 @@ fn classify_guard_type(type_text: &str) -> Option<AuthCheckKind> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Outermost type name: text before the first `<`, with reference
|
||||
/// markers (`&`, `&mut`, `&'a`, etc.) and module-path prefix
|
||||
/// (`std::collections::`) stripped. Returns the empty string for
|
||||
/// inputs that don't parse as a type.
|
||||
fn outermost_type_name(type_text: &str) -> &str {
|
||||
let trimmed = type_text.trim();
|
||||
let mut after_refs = trimmed;
|
||||
loop {
|
||||
let next = after_refs
|
||||
.trim_start_matches('&')
|
||||
.trim_start_matches("mut ")
|
||||
.trim_start();
|
||||
// Strip any single lifetime token like `'a ` after the `&`.
|
||||
let next = if let Some(rest) = next.strip_prefix('\'') {
|
||||
rest.split_once(' ')
|
||||
.map(|(_, after)| after.trim_start())
|
||||
.unwrap_or(rest)
|
||||
} else {
|
||||
next
|
||||
};
|
||||
if next == after_refs {
|
||||
break;
|
||||
}
|
||||
after_refs = next;
|
||||
}
|
||||
let prefix = after_refs.split('<').next().unwrap_or(after_refs).trim();
|
||||
prefix.rsplit("::").next().unwrap_or(prefix).trim()
|
||||
}
|
||||
|
||||
/// Outer wrapper name (lowercase, exact-match) that the engine treats
|
||||
/// as a bare data-only extractor: yielding the inner type to the
|
||||
/// handler without any auth side-effect. Matched on the outer name
|
||||
/// only so policy-bearing wrappers carrying a data extractor as one
|
||||
/// of their generic args (e.g.
|
||||
/// `GuardedData<Policy, web::Path<u64>>`) are not mis-suppressed by
|
||||
/// the inner `Path<...>`.
|
||||
fn is_data_only_extractor_outer(outer_lower: &str) -> bool {
|
||||
matches!(
|
||||
outer_lower,
|
||||
"path" | "query" | "json" | "form" | "extension" | "state" | "data" | "reqdata"
|
||||
)
|
||||
}
|
||||
|
||||
fn classify_rocket_guard_type(
|
||||
type_text: &str,
|
||||
binding: &str,
|
||||
|
|
@ -612,6 +710,14 @@ pub(crate) fn inject_guard_checks(
|
|||
for call in guard_calls {
|
||||
let kind = if rules.is_admin_guard(&call.name, &call.args) {
|
||||
AuthCheckKind::AdminGuard
|
||||
} else if rules.is_policy_guard(&call.name) {
|
||||
// Policy/capability-bearing typed extractor (e.g.
|
||||
// meilisearch's `GuardedData<ActionPolicy<X>, _>`).
|
||||
// Recorded as `Other` so the route-level short-circuit in
|
||||
// `auth_check_covers_subject` covers any sink in the
|
||||
// handler — the wrapper proves authorization, not just
|
||||
// authentication.
|
||||
AuthCheckKind::Other
|
||||
} else if rules.is_login_guard(&call.name) {
|
||||
AuthCheckKind::LoginGuard
|
||||
} else {
|
||||
|
|
@ -633,3 +739,153 @@ pub(crate) fn inject_guard_checks(
|
|||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// Walk every `Function`-kind unit in `model` and inject route-level
|
||||
/// guard checks for any parameter whose type is recognised as a
|
||||
/// typed auth/policy extractor (e.g. meilisearch's `GuardedData<P, D>`,
|
||||
/// `axum::extract::State<AuthCtx>`). Complements the route-walk path
|
||||
/// in `collect_routes`: handlers registered by attribute macros
|
||||
/// (`#[routes::path(...)]`, `#[get("/path")]`) or by external
|
||||
/// service-config builders are never matched as route registrations
|
||||
/// here, so their typed-extractor guards would otherwise never be
|
||||
/// injected and `missing_ownership_check` would fire on every
|
||||
/// id-shaped sink they contain.
|
||||
///
|
||||
/// `RouteHandler`-kind units already had their guards injected during
|
||||
/// the route walk and are skipped to avoid duplicate `AuthCheck`
|
||||
/// entries.
|
||||
pub(crate) fn apply_typed_extractor_guards_to_units(
|
||||
root: Node<'_>,
|
||||
bytes: &[u8],
|
||||
rules: &AuthAnalysisRules,
|
||||
model: &mut crate::auth_analysis::model::AuthorizationModel,
|
||||
framework: GuardFramework,
|
||||
) {
|
||||
use crate::auth_analysis::model::AnalysisUnitKind;
|
||||
let function_nodes = collect_function_definition_nodes(root);
|
||||
for unit_idx in 0..model.units.len() {
|
||||
let span = {
|
||||
let unit = &model.units[unit_idx];
|
||||
if unit.kind == AnalysisUnitKind::RouteHandler {
|
||||
continue;
|
||||
}
|
||||
unit.span
|
||||
};
|
||||
let Some(handler_node) = function_nodes
|
||||
.iter()
|
||||
.find(|node| node.start_byte() == span.0 && node.end_byte() == span.1)
|
||||
.copied()
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
let guard_calls = guard_calls_for_handler(handler_node, "", bytes, framework);
|
||||
if guard_calls.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let unit = &mut model.units[unit_idx];
|
||||
inject_guard_checks(unit, &guard_calls, rules);
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_function_definition_nodes<'tree>(root: Node<'tree>) -> Vec<Node<'tree>> {
|
||||
let mut out = Vec::new();
|
||||
walk_function_definitions(root, &mut out);
|
||||
out
|
||||
}
|
||||
|
||||
fn walk_function_definitions<'tree>(node: Node<'tree>, out: &mut Vec<Node<'tree>>) {
|
||||
// Free / impl / trait fn definitions in tree-sitter-rust.
|
||||
if node.kind() == "function_item" {
|
||||
out.push(node);
|
||||
}
|
||||
for child in named_children(node) {
|
||||
walk_function_definitions(child, out);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn outermost_type_name_strips_refs_and_module_prefix() {
|
||||
assert_eq!(outermost_type_name("GuardedData<P, D>"), "GuardedData");
|
||||
assert_eq!(outermost_type_name("&GuardedData<P, D>"), "GuardedData");
|
||||
assert_eq!(
|
||||
outermost_type_name("&'a mut GuardedData<P, D>"),
|
||||
"GuardedData"
|
||||
);
|
||||
assert_eq!(outermost_type_name("web::Path<u64>"), "Path");
|
||||
assert_eq!(outermost_type_name("std::sync::Arc<Mutex<T>>"), "Arc");
|
||||
assert_eq!(outermost_type_name(""), "");
|
||||
assert_eq!(outermost_type_name("Bare"), "Bare");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_guard_type_recognises_guarded_data_outer_wrapper() {
|
||||
// Real meilisearch shape with both an admin-token-bearing inner
|
||||
// type and a Data inner extractor — must classify as `Other`
|
||||
// (route-level policy), not LoginGuard (filtered out by
|
||||
// `has_prior_subject_auth`) and not None (over-suppression
|
||||
// would happen if the inner `Data<>` early-return fired).
|
||||
let kind = classify_guard_type(
|
||||
"GuardedData<ActionPolicy<{ actions::KEYS_GET }>, Data<AuthController>>",
|
||||
);
|
||||
assert_eq!(kind, Some(AuthCheckKind::Other));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_guard_type_data_only_extractor_outer_returns_none() {
|
||||
// Outer `Data<>` is a bare actix data extractor — not auth.
|
||||
// Even though the inner type lower-cases to contain "auth",
|
||||
// the outer-wrapper recognition correctly returns None.
|
||||
assert_eq!(
|
||||
classify_guard_type("Data<AuthController>"),
|
||||
None,
|
||||
"outer Data<> is a bare data extractor, not auth-bearing"
|
||||
);
|
||||
assert_eq!(classify_guard_type("web::Path<UserId>"), None);
|
||||
assert_eq!(classify_guard_type("Json<CreateUser>"), None);
|
||||
assert_eq!(classify_guard_type("Form<LoginForm>"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_guard_type_preserves_existing_login_guard_recognition() {
|
||||
assert_eq!(
|
||||
classify_guard_type("LocalUserView"),
|
||||
Some(AuthCheckKind::LoginGuard)
|
||||
);
|
||||
assert_eq!(
|
||||
classify_guard_type("Authenticated"),
|
||||
Some(AuthCheckKind::LoginGuard)
|
||||
);
|
||||
assert_eq!(
|
||||
classify_guard_type("AdminUser"),
|
||||
Some(AuthCheckKind::AdminGuard)
|
||||
);
|
||||
assert_eq!(
|
||||
classify_guard_type("CurrentUser"),
|
||||
Some(AuthCheckKind::LoginGuard)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_guard_type_admin_guarded_takes_admin_priority() {
|
||||
// `AdminGuard` outer wrapper has both "admin" and "guard" tokens
|
||||
// — admin-priority rule wins inside the Guarded branch.
|
||||
assert_eq!(
|
||||
classify_guard_type("AdminGuard<P, D>"),
|
||||
Some(AuthCheckKind::AdminGuard)
|
||||
);
|
||||
assert_eq!(
|
||||
classify_guard_type("GuardedAdmin<X>"),
|
||||
Some(AuthCheckKind::AdminGuard)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_guard_type_unknown_outer_returns_none() {
|
||||
assert_eq!(classify_guard_type("MyCustomWrapper<T>"), None);
|
||||
assert_eq!(classify_guard_type(""), None);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3455,6 +3455,33 @@ pub fn extract_value_refs(node: Node<'_>, bytes: &[u8]) -> Vec<ValueRef> {
|
|||
index: None,
|
||||
span: span(node),
|
||||
}],
|
||||
// Keyword / named arguments: `Model.objects.filter(organization_id=org.id)`.
|
||||
// Tree-sitter exposes a `name` child (the schema column / parameter
|
||||
// name) and a `value` child (the actual expression). The default
|
||||
// recurse-all-children arm would surface `organization_id` as a
|
||||
// bare-identifier subject, which `is_id_like_name` then flags as
|
||||
// a scoped-identifier user-input. But the kwarg key is the
|
||||
// ORM/RPC schema field name, fixed at call time, never
|
||||
// attacker-controlled. Only the value carries a subject.
|
||||
//
|
||||
// Covers Python `keyword_argument`, JavaScript / TypeScript
|
||||
// `pair` (object property syntax used as kwargs in client libs
|
||||
// like prisma's `where: { id: foo }` is handled separately),
|
||||
// Ruby `pair` (hash kwargs in `Model.where(field: value)`), Go
|
||||
// composite-literal element keys, PHP / C# named arguments.
|
||||
"keyword_argument"
|
||||
| "keyword_arg"
|
||||
| "named_argument"
|
||||
| "named_arg" => {
|
||||
if let Some(value) = node
|
||||
.child_by_field_name("value")
|
||||
.or_else(|| node.child_by_field_name("argument"))
|
||||
{
|
||||
extract_value_refs(value, bytes)
|
||||
} else {
|
||||
Vec::new()
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
let mut refs = Vec::new();
|
||||
for idx in 0..node.named_child_count() {
|
||||
|
|
|
|||
|
|
@ -127,6 +127,9 @@ fn parse_flask_route_decorator(
|
|||
};
|
||||
|
||||
let callee = text(function, bytes);
|
||||
if callee_is_test_decorator(&callee) {
|
||||
return None;
|
||||
}
|
||||
let method_name = bare_method_name(&callee);
|
||||
let arguments = decorator_expr.child_by_field_name("arguments")?;
|
||||
let args = named_children(arguments);
|
||||
|
|
@ -173,6 +176,45 @@ fn parse_methods_keyword(arguments: Node<'_>, bytes: &[u8]) -> Option<Vec<HttpMe
|
|||
}
|
||||
}
|
||||
|
||||
/// True iff the callee text matches a known Python test-framework
|
||||
/// decorator that incidentally collides with the Flask `<app>.<verb>`
|
||||
/// shape. `unittest.mock.patch` is the dominant collision: it takes a
|
||||
/// string literal as its first positional arg (the import path of the
|
||||
/// thing being patched), and `bare_method_name("mock.patch")` is
|
||||
/// `patch`, which `parse_flask_route_decorator` previously matched as
|
||||
/// HTTP PATCH. Every test method decorated with `@mock.patch("...")`
|
||||
/// was therefore being attached as a Flask route handler, which
|
||||
/// flipped its `unit.kind` to `RouteHandler` and made it pass
|
||||
/// `unit_has_user_input_evidence` unconditionally — flooding the
|
||||
/// pytest test suites with `missing_ownership_check` findings.
|
||||
///
|
||||
/// The denylist mirrors common mock / monkeypatch / parametrize forms.
|
||||
/// Conservative: matches only the canonical receiver chains; an
|
||||
/// imported alias `from unittest.mock import patch` then bare
|
||||
/// `@patch("x")` would still match `patch` as PATCH, but the
|
||||
/// decorator must also carry a string-literal first arg AND the
|
||||
/// route-attached unit must come back through the auth analysis to
|
||||
/// fire — handlers with a string-arg decorator are rare outside Flask
|
||||
/// itself, and the wider precondition path now covers most of those.
|
||||
fn callee_is_test_decorator(callee: &str) -> bool {
|
||||
matches!(
|
||||
callee,
|
||||
"mock.patch"
|
||||
| "mock.patch.object"
|
||||
| "mock.patch.dict"
|
||||
| "mock.patch.multiple"
|
||||
| "unittest.mock.patch"
|
||||
| "unittest.mock.patch.object"
|
||||
| "unittest.mock.patch.dict"
|
||||
| "unittest.mock.patch.multiple"
|
||||
| "monkeypatch.setattr"
|
||||
| "monkeypatch.setenv"
|
||||
| "monkeypatch.delattr"
|
||||
| "monkeypatch.delenv"
|
||||
| "pytest.mark.parametrize"
|
||||
)
|
||||
}
|
||||
|
||||
fn keyword_argument_string(arguments: Node<'_>, bytes: &[u8], name: &str) -> Option<String> {
|
||||
let value = keyword_argument_value(arguments, bytes, name)?;
|
||||
string_literal_value(value, bytes)
|
||||
|
|
@ -331,6 +373,41 @@ fn inject_middleware_auth(
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_decorator_tests {
|
||||
use super::callee_is_test_decorator;
|
||||
|
||||
/// Test-framework decorators that share their bare method name with
|
||||
/// a Flask HTTP verb (`patch`, `delete`, ...) must be excluded
|
||||
/// from `parse_flask_route_decorator`. Without the denylist,
|
||||
/// every `@mock.patch("module")` in pytest test files attaches
|
||||
/// the test method as a Flask PATCH route handler — flooding the
|
||||
/// auth-analysis with FPs.
|
||||
#[test]
|
||||
fn callee_is_test_decorator_recognises_canonical_forms() {
|
||||
// unittest.mock variants.
|
||||
assert!(callee_is_test_decorator("mock.patch"));
|
||||
assert!(callee_is_test_decorator("mock.patch.object"));
|
||||
assert!(callee_is_test_decorator("mock.patch.dict"));
|
||||
assert!(callee_is_test_decorator("mock.patch.multiple"));
|
||||
assert!(callee_is_test_decorator("unittest.mock.patch"));
|
||||
assert!(callee_is_test_decorator("unittest.mock.patch.object"));
|
||||
// pytest fixtures.
|
||||
assert!(callee_is_test_decorator("monkeypatch.setattr"));
|
||||
assert!(callee_is_test_decorator("monkeypatch.setenv"));
|
||||
assert!(callee_is_test_decorator("pytest.mark.parametrize"));
|
||||
// Negatives — real Flask decorators must still match.
|
||||
assert!(!callee_is_test_decorator("app.route"));
|
||||
assert!(!callee_is_test_decorator("app.get"));
|
||||
assert!(!callee_is_test_decorator("app.post"));
|
||||
assert!(!callee_is_test_decorator("app.patch"));
|
||||
assert!(!callee_is_test_decorator("bp.delete"));
|
||||
assert!(!callee_is_test_decorator("blueprint.put"));
|
||||
assert!(!callee_is_test_decorator("router.get"));
|
||||
assert!(!callee_is_test_decorator(""));
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod fastapi_dependencies_tests {
|
||||
use super::is_depends_callee;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use super::config::AuthAnalysisRules;
|
||||
use super::model::AuthorizationModel;
|
||||
use crate::utils::project::FrameworkContext;
|
||||
use crate::utils::project::{FrameworkContext, rust_file_imports_web_framework};
|
||||
use std::path::Path;
|
||||
use tree_sitter::Tree;
|
||||
|
||||
|
|
@ -61,6 +61,18 @@ pub fn extract_authorization_model(
|
|||
}
|
||||
}
|
||||
|
||||
// Per-language web-framework signal used to gate the param-name arm
|
||||
// of `unit_has_user_input_evidence`. Combines the project-root
|
||||
// manifest detection (`framework_ctx`) with a per-file `use`/`import`
|
||||
// check, so a single file in a workspace whose root manifest does
|
||||
// not name a web framework can still opt back in by directly
|
||||
// importing one (e.g. `crates/collab/src/rpc.rs` in zed: workspace
|
||||
// root has no axum, but the file uses `axum::Router`).
|
||||
//
|
||||
// Three-valued: `Some(true)` keeps step 3 firing, `Some(false)`
|
||||
// suppresses it, `None` means no detection ran ─ behavior unchanged.
|
||||
model.lang_web_framework_signal = compute_web_framework_signal(lang, framework_ctx, bytes);
|
||||
|
||||
// **Dedup units by span across extractors.** Multiple extractors
|
||||
// (e.g. Flask + Django on a Python file) each call
|
||||
// `collect_top_level_units`, producing one unit per top-level
|
||||
|
|
@ -80,6 +92,53 @@ pub fn extract_authorization_model(
|
|||
model
|
||||
}
|
||||
|
||||
/// Compute the per-file web-framework signal used to gate the
|
||||
/// param-name arm of `unit_has_user_input_evidence`.
|
||||
///
|
||||
/// Currently emits a non-`None` value only for Rust files. The Rust
|
||||
/// auth analysis is the single biggest source of internal-helper FPs
|
||||
/// in non-web crates (zed's GUI / editor crates); the other languages
|
||||
/// have their own handler-classification policies that already filter
|
||||
/// effectively, so they keep their existing behavior (None →
|
||||
/// fall-through to the param-name heuristic) until each is validated.
|
||||
///
|
||||
/// Three-valued semantics:
|
||||
/// * `Some(true)` ─ project root manifest names a Rust web framework
|
||||
/// (axum / actix_web / rocket), OR the file directly imports one.
|
||||
/// Param-name evidence stays on.
|
||||
/// * `Some(false)` ─ project root manifest was inspected (Cargo.toml
|
||||
/// exists) and named no Rust web framework, AND the file does not
|
||||
/// directly import one. Param-name evidence is suppressed: the
|
||||
/// project has no HTTP boundary in Rust.
|
||||
/// * `None` ─ no detection ran (no `framework_ctx`, no Cargo.toml
|
||||
/// inspected). Behavior unchanged.
|
||||
fn compute_web_framework_signal(
|
||||
lang: &str,
|
||||
framework_ctx: Option<&FrameworkContext>,
|
||||
bytes: &[u8],
|
||||
) -> Option<bool> {
|
||||
if !matches!(lang, "rust" | "rs") {
|
||||
return None;
|
||||
}
|
||||
let project_signal = framework_ctx.and_then(|ctx| ctx.lang_has_web_framework("rust"));
|
||||
if project_signal == Some(true) {
|
||||
return Some(true);
|
||||
}
|
||||
// Project says "no Rust framework" or never inspected. Consult the
|
||||
// file's own imports as a per-file fallback; if the file uses an
|
||||
// axum / actix_web / rocket symbol directly, treat it as a handler
|
||||
// file even when the workspace-root Cargo.toml does not list the
|
||||
// crate. (Real example: zed's `crates/collab/src/rpc.rs` imports
|
||||
// axum but the workspace root Cargo.toml does not.)
|
||||
if rust_file_imports_web_framework(bytes) {
|
||||
return Some(true);
|
||||
}
|
||||
// No file-level evidence either. Only flip to `Some(false)` if a
|
||||
// Cargo.toml manifest was actually inspected — single-file scans
|
||||
// without project context get `None` and preserve prior behavior.
|
||||
project_signal
|
||||
}
|
||||
|
||||
fn deduplicate_units_by_span(model: &mut AuthorizationModel) {
|
||||
use crate::auth_analysis::model::{AnalysisUnit, AnalysisUnitKind};
|
||||
use std::collections::HashMap;
|
||||
|
|
|
|||
|
|
@ -348,6 +348,20 @@ pub struct RouteRegistration {
|
|||
pub struct AuthorizationModel {
|
||||
pub routes: Vec<RouteRegistration>,
|
||||
pub units: Vec<AnalysisUnit>,
|
||||
/// Per-language web-framework presence signal used to gate the
|
||||
/// `is_external_input_param_name` arm of `unit_has_user_input_evidence`.
|
||||
///
|
||||
/// `None` means detection did not run (single-file unit-test paths,
|
||||
/// languages without a framework gate yet). `Some(true)` means the
|
||||
/// project manifest or the file's imports name a web framework that
|
||||
/// matches this language ─ helper functions are plausibly reachable
|
||||
/// from a route handler, so the param-name heuristic stays on.
|
||||
/// `Some(false)` means detection ran and named no matching framework
|
||||
/// ─ the file lives in a project with no HTTP boundary, so internal
|
||||
/// helper params named `*_id` / `req` / `payload` are not user input.
|
||||
///
|
||||
/// Currently set only for Rust by `extract_authorization_model`.
|
||||
pub lang_web_framework_signal: Option<bool>,
|
||||
}
|
||||
|
||||
impl AuthorizationModel {
|
||||
|
|
@ -359,5 +373,22 @@ impl AuthorizationModel {
|
|||
route.unit_idx += unit_offset;
|
||||
route
|
||||
}));
|
||||
// Take the strongest signal across extractor outputs: `Some(true)`
|
||||
// wins over `Some(false)` wins over `None`. In practice every
|
||||
// extractor for a given file sees the same `framework_ctx + bytes`
|
||||
// so they all derive identical signals; this is just a defensive
|
||||
// merge.
|
||||
self.lang_web_framework_signal = max_signal(
|
||||
self.lang_web_framework_signal,
|
||||
other.lang_web_framework_signal,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn max_signal(a: Option<bool>, b: Option<bool>) -> Option<bool> {
|
||||
match (a, b) {
|
||||
(Some(true), _) | (_, Some(true)) => Some(true),
|
||||
(Some(false), _) | (_, Some(false)) => Some(false),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -345,6 +345,126 @@ pub(super) fn has_keyword_arg(call_node: Node, keyword_name: &str, code: &[u8])
|
|||
false
|
||||
}
|
||||
|
||||
/// Extract the literal value of a property `prop_name` from the object
|
||||
/// literal at positional argument `arg_index`. Returns `None` if the
|
||||
/// arg is absent, is not an object literal, the prop key isn't found,
|
||||
/// or the prop value isn't a literal (so callers can distinguish
|
||||
/// "present but dynamic" from "absent" only via [`has_object_arg_property`]).
|
||||
///
|
||||
/// Used by JS/TS-style "options object as kwargs" gates — e.g.
|
||||
/// `_.template(tpl, { evaluate: false })` — where the safe-flag lives
|
||||
/// in an inline object literal rather than as a dedicated kwarg node
|
||||
/// (which JS does not have). Strict-additive: returns `None` for any
|
||||
/// non-JS-object shape, including bare identifiers passed as the
|
||||
/// options arg, so the gate falls back to the conservative dynamic
|
||||
/// branch.
|
||||
pub(super) fn extract_object_arg_property(
|
||||
call_node: Node,
|
||||
arg_index: usize,
|
||||
prop_name: &str,
|
||||
code: &[u8],
|
||||
) -> Option<String> {
|
||||
let args = call_node.child_by_field_name("arguments")?;
|
||||
let mut cursor = args.walk();
|
||||
let arg = args.named_children(&mut cursor).nth(arg_index)?;
|
||||
let arg = unwrap_parens(arg);
|
||||
if !matches!(arg.kind(), "object" | "dictionary") {
|
||||
return None;
|
||||
}
|
||||
let mut c = arg.walk();
|
||||
for child in arg.named_children(&mut c) {
|
||||
if child.kind() != "pair" {
|
||||
continue;
|
||||
}
|
||||
let Some(key_node) = child.child_by_field_name("key") else {
|
||||
continue;
|
||||
};
|
||||
let key_text = match key_node.kind() {
|
||||
"string" | "string_literal" => text_of(key_node, code).map(|raw| {
|
||||
if raw.len() >= 2 {
|
||||
raw[1..raw.len() - 1].to_string()
|
||||
} else {
|
||||
raw
|
||||
}
|
||||
}),
|
||||
"computed_property_name" => continue,
|
||||
_ => text_of(key_node, code),
|
||||
};
|
||||
if key_text.as_deref() != Some(prop_name) {
|
||||
continue;
|
||||
}
|
||||
let val_node = child.child_by_field_name("value")?;
|
||||
let val_node = unwrap_parens(val_node);
|
||||
return match val_node.kind() {
|
||||
"true" | "false" | "null" | "undefined" | "number" | "string" | "string_literal" => {
|
||||
text_of(val_node, code).map(|s| s.to_string())
|
||||
}
|
||||
// JS booleans true/false are their own node kinds (above), but
|
||||
// some grammar versions wrap them as identifier literals; surface
|
||||
// `undefined` similarly.
|
||||
"identifier" => text_of(val_node, code)
|
||||
.filter(|s| matches!(s.as_str(), "true" | "false" | "null" | "undefined")),
|
||||
_ => None,
|
||||
};
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Return `true` if the call node's positional arg at `arg_index` is an
|
||||
/// object literal containing a property named `prop_name` (whether the
|
||||
/// value is a literal or a dynamic expression). Used alongside
|
||||
/// [`extract_object_arg_property`] so gated-sink classification can
|
||||
/// distinguish "options key absent" (language default) from "options
|
||||
/// key present with dynamic value" (conservative dangerous).
|
||||
pub(super) fn has_object_arg_property(
|
||||
call_node: Node,
|
||||
arg_index: usize,
|
||||
prop_name: &str,
|
||||
code: &[u8],
|
||||
) -> bool {
|
||||
let Some(args) = call_node.child_by_field_name("arguments") else {
|
||||
return false;
|
||||
};
|
||||
let mut cursor = args.walk();
|
||||
let Some(arg) = args.named_children(&mut cursor).nth(arg_index) else {
|
||||
return false;
|
||||
};
|
||||
let arg = unwrap_parens(arg);
|
||||
if !matches!(arg.kind(), "object" | "dictionary") {
|
||||
return false;
|
||||
}
|
||||
let mut c = arg.walk();
|
||||
for child in arg.named_children(&mut c) {
|
||||
match child.kind() {
|
||||
"shorthand_property_identifier" | "shorthand_property_identifier_pattern"
|
||||
if text_of(child, code).as_deref() == Some(prop_name) =>
|
||||
{
|
||||
return true;
|
||||
}
|
||||
"pair" => {
|
||||
if let Some(key_node) = child.child_by_field_name("key") {
|
||||
let key_text = match key_node.kind() {
|
||||
"string" | "string_literal" => text_of(key_node, code).map(|raw| {
|
||||
if raw.len() >= 2 {
|
||||
raw[1..raw.len() - 1].to_string()
|
||||
} else {
|
||||
raw
|
||||
}
|
||||
}),
|
||||
"computed_property_name" => continue,
|
||||
_ => text_of(key_node, code),
|
||||
};
|
||||
if key_text.as_deref() == Some(prop_name) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Inspect the first positional argument of a call node and return its
|
||||
/// tree-sitter `kind()` plus a flag indicating whether any descendant is an
|
||||
/// `interpolation` node. Skips parenthesisation (`(arg0)` is treated as
|
||||
|
|
@ -584,6 +704,29 @@ pub(super) fn find_chained_inner_call<'a>(
|
|||
let function = outer
|
||||
.child_by_field_name("function")
|
||||
.or_else(|| outer.child_by_field_name("method"))?;
|
||||
// Direct double-call form (`f()(x)`): the outer call's `function`
|
||||
// field IS itself a call_expression, with no intermediate
|
||||
// member-chain. Treat the inner call as the chain's innermost.
|
||||
// Without this, lodash-style template-render chains like
|
||||
// `_.template(t)(data)` evade the chained-inner rebinding because
|
||||
// the outer's function field is a `call_expression`, not the
|
||||
// `member_expression` shape the original branch below expects.
|
||||
if matches!(
|
||||
lookup(lang, function.kind()),
|
||||
Kind::CallFn | Kind::CallMethod
|
||||
) {
|
||||
// Recurse: the inner call may itself be chained.
|
||||
if let Some(inner) = find_chained_inner_call(function, lang, code) {
|
||||
return Some(inner);
|
||||
}
|
||||
let inner_func = function
|
||||
.child_by_field_name("function")
|
||||
.or_else(|| function.child_by_field_name("method"))
|
||||
.or_else(|| function.child_by_field_name("name"))?;
|
||||
let raw = text_of(inner_func, code)?;
|
||||
let inner_text: String = raw.chars().filter(|c| !c.is_whitespace()).collect();
|
||||
return Some((function, inner_text));
|
||||
}
|
||||
// The function/method field for a chained call is a member_expression
|
||||
// (JS/TS) or attribute (Python) etc.; its `object` field is the
|
||||
// receiver expression. Only proceed when that receiver is itself a
|
||||
|
|
|
|||
|
|
@ -54,8 +54,9 @@ use literals::{
|
|||
detect_rust_replace_chain_sanitizer, extract_arg_callees, extract_arg_string_literals,
|
||||
extract_arg_uses, extract_const_keyword_arg, extract_const_macro_arg, extract_const_string_arg,
|
||||
extract_destination_field_pairs, extract_destination_kwarg_pairs, extract_kwargs,
|
||||
extract_literal_rhs, extract_shell_array_payload_idents, find_call_node, find_call_node_deep,
|
||||
find_chained_inner_call, has_keyword_arg, has_only_literal_args, is_parameterized_query_call,
|
||||
extract_literal_rhs, extract_object_arg_property, extract_shell_array_payload_idents,
|
||||
find_call_node, find_call_node_deep, find_chained_inner_call, has_keyword_arg,
|
||||
has_object_arg_property, has_only_literal_args, is_parameterized_query_call,
|
||||
java_chain_arg0_kind_for_method, js_chain_arg0_kind_for_method,
|
||||
js_chain_outer_method_for_inner, ruby_chain_arg0_for_method, walk_chain_inner_call_args,
|
||||
};
|
||||
|
|
@ -67,11 +68,33 @@ use params::{
|
|||
/// Test-only re-export of [`extract_param_meta`] so the external
|
||||
/// `tests/typed_extractors_audit.rs` harness can drive the per-param
|
||||
/// classifier directly without spinning up the full scan pipeline.
|
||||
/// Projects away the destructured-siblings third tuple slot so the
|
||||
/// existing tuple-shape assertions in the audit harness keep working;
|
||||
/// the sibling info is plumbed separately through `BodyMeta`.
|
||||
pub fn extract_param_meta_for_test<'a>(
|
||||
func_node: tree_sitter::Node<'a>,
|
||||
lang: &str,
|
||||
code: &'a [u8],
|
||||
) -> Vec<(String, Option<crate::ssa::type_facts::TypeKind>)> {
|
||||
extract_param_meta(func_node, lang, code)
|
||||
.into_iter()
|
||||
.map(|(name, ty, _siblings)| (name, ty))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Test-only re-export that returns the full per-slot tuple including
|
||||
/// destructured sibling names. Used by the destructured-arg-probe
|
||||
/// regression tests in `src/taint/tests.rs` and the params unit tests
|
||||
/// in `src/cfg/cfg_tests.rs`.
|
||||
pub fn extract_param_meta_with_destructured_for_test<'a>(
|
||||
func_node: tree_sitter::Node<'a>,
|
||||
lang: &str,
|
||||
code: &'a [u8],
|
||||
) -> Vec<(
|
||||
String,
|
||||
Option<crate::ssa::type_facts::TypeKind>,
|
||||
Vec<String>,
|
||||
)> {
|
||||
extract_param_meta(func_node, lang, code)
|
||||
}
|
||||
|
||||
|
|
@ -567,6 +590,17 @@ pub struct BodyMeta {
|
|||
/// `None`, downstream behaviour is identical to the pre-Phase-1
|
||||
/// engine.
|
||||
pub param_types: Vec<Option<crate::ssa::type_facts::TypeKind>>,
|
||||
/// Per-parameter destructured-binding sibling names. Same length
|
||||
/// as `params`; entry `i` lists field names bound by the same
|
||||
/// argument slot as `params[i]`, excluding the primary name itself.
|
||||
/// Empty for non-destructured params. Today populated only for
|
||||
/// JS/TS object-pattern formals (`({ a, b, c })` → params=["a"],
|
||||
/// destructured=[["b","c"]]). Used by per-parameter taint-summary
|
||||
/// probing in `extract_ssa_func_summary` so destructured bindings
|
||||
/// inside the body share the slot's seeded caps and any of them
|
||||
/// being in `validated_must` at a return path counts as the slot
|
||||
/// being validated. Closes the residual gap behind CVE-2026-25544.
|
||||
pub param_destructured_fields: Vec<Vec<String>>,
|
||||
pub param_count: usize,
|
||||
pub span: (usize, usize),
|
||||
pub parent_body_id: Option<BodyId>,
|
||||
|
|
@ -1909,8 +1943,27 @@ pub(super) fn push_node<'a>(
|
|||
}
|
||||
})
|
||||
},
|
||||
|kw| extract_const_keyword_arg(cn, kw, code),
|
||||
|kw| has_keyword_arg(cn, kw, code),
|
||||
|kw| {
|
||||
// For JS/TS, options-bearing args are passed as inline
|
||||
// object literals (`fn(x, { evaluate: false })`) rather
|
||||
// than language-level keyword arguments. When the
|
||||
// standard `keyword_argument`-walking extractor returns
|
||||
// None, fall back to inspecting arg 1's object literal
|
||||
// for a property named `kw`. This lets gates like
|
||||
// `_.template` consult `{ evaluate: false }` literally.
|
||||
extract_const_keyword_arg(cn, kw, code).or_else(|| {
|
||||
if matches!(lang, "javascript" | "typescript") {
|
||||
extract_object_arg_property(cn, 1, kw, code)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
},
|
||||
|kw| {
|
||||
has_keyword_arg(cn, kw, code)
|
||||
|| (matches!(lang, "javascript" | "typescript")
|
||||
&& has_object_arg_property(cn, 1, kw, code))
|
||||
},
|
||||
);
|
||||
|
||||
if !matches.is_empty() {
|
||||
|
|
@ -3871,9 +3924,13 @@ pub(super) fn build_sub<'a>(
|
|||
let is_anon = is_anon_fn_name(&fn_name);
|
||||
let param_meta = extract_param_meta(ast, lang, code);
|
||||
let param_count = param_meta.len();
|
||||
let param_names: Vec<String> = param_meta.iter().map(|(n, _)| n.clone()).collect();
|
||||
let param_names: Vec<String> = param_meta.iter().map(|(n, _, _)| n.clone()).collect();
|
||||
let param_types: Vec<Option<crate::ssa::type_facts::TypeKind>> =
|
||||
param_meta.iter().map(|(_, t)| t.clone()).collect();
|
||||
param_meta.iter().map(|(_, t, _)| t.clone()).collect();
|
||||
let param_destructured_fields: Vec<Vec<String>> = param_meta
|
||||
.iter()
|
||||
.map(|(_, _, siblings)| siblings.clone())
|
||||
.collect();
|
||||
|
||||
// ── 1b) Compute identity discriminators ───────────────────────────
|
||||
let (fn_container, fn_kind) =
|
||||
|
|
@ -4130,6 +4187,7 @@ pub(super) fn build_sub<'a>(
|
|||
name: if is_anon { None } else { Some(fn_name.clone()) },
|
||||
params: param_names,
|
||||
param_types,
|
||||
param_destructured_fields,
|
||||
param_count,
|
||||
span: (ast.start_byte(), ast.end_byte()),
|
||||
parent_body_id: Some(current_body_id),
|
||||
|
|
@ -4628,6 +4686,7 @@ pub(crate) fn build_cfg<'a>(
|
|||
name: None,
|
||||
params: Vec::new(),
|
||||
param_types: Vec::new(),
|
||||
param_destructured_fields: Vec::new(),
|
||||
param_count: 0,
|
||||
span: (0, code.len()),
|
||||
parent_body_id: None,
|
||||
|
|
|
|||
|
|
@ -21,16 +21,27 @@ fn lookup_dto_class(class_name: &str) -> Option<TypeKind> {
|
|||
/// Extract parameter names + per-position [`TypeKind`] from a function
|
||||
/// AST node. Each entry's second slot is `Some(TypeKind)` when the
|
||||
/// parameter's decorator, attribute, or static type annotation maps to
|
||||
/// a known kind, and `None` otherwise. Strictly additive, when no
|
||||
/// type info is recoverable, behaviour is identical to the names-only
|
||||
/// path.
|
||||
/// a known kind, and `None` otherwise. The third slot lists
|
||||
/// destructured field names bound by the same parameter slot — empty
|
||||
/// for non-destructured params and for the primary name itself. E.g.
|
||||
/// for the JS/TS object-pattern formal `({ a, b, c })`, the entry is
|
||||
/// `("a", None, ["b", "c"])`. Strictly additive: when the param is
|
||||
/// not a destructured pattern (or the language has no destructure
|
||||
/// concept), behaviour is identical to the pre-Phase-5 names-only path.
|
||||
///
|
||||
/// Closes the residual gap behind CVE-2026-25544 (PayloadCMS Drizzle
|
||||
/// SQL injection): a per-parameter taint probe that seeds only the
|
||||
/// primary name `column` cannot see flow through sibling destructured
|
||||
/// bindings (`value` etc.) inside the body, so summary extraction
|
||||
/// misses `validated_params_to_return` when a validator helper is
|
||||
/// applied to one of the siblings.
|
||||
pub(super) fn extract_param_meta<'a>(
|
||||
func_node: Node<'a>,
|
||||
lang: &str,
|
||||
code: &'a [u8],
|
||||
) -> Vec<(String, Option<TypeKind>)> {
|
||||
) -> Vec<(String, Option<TypeKind>, Vec<String>)> {
|
||||
let cfg = param_config(lang);
|
||||
let mut out: Vec<(String, Option<TypeKind>)> = Vec::new();
|
||||
let mut out: Vec<(String, Option<TypeKind>, Vec<String>)> = Vec::new();
|
||||
// Try the params_field directly on the function node first.
|
||||
// For C/C++, the parameter list is nested inside the declarator
|
||||
// (function_definition > declarator:function_declarator > parameters:parameter_list),
|
||||
|
|
@ -51,7 +62,7 @@ pub(super) fn extract_param_meta<'a>(
|
|||
if let Some(p) = func_node.child_by_field_name("parameter") {
|
||||
if p.kind() == "identifier" {
|
||||
if let Some(name) = text_of(p, code) {
|
||||
out.push((name, None));
|
||||
out.push((name, None, Vec::new()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -62,7 +73,7 @@ pub(super) fn extract_param_meta<'a>(
|
|||
for child in params.children(&mut cursor) {
|
||||
// Self/this parameter (e.g. Rust's `self_parameter`)
|
||||
if cfg.self_param_kinds.contains(&child.kind()) {
|
||||
out.push(("self".into(), None));
|
||||
out.push(("self".into(), None, Vec::new()));
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -74,14 +85,26 @@ pub(super) fn extract_param_meta<'a>(
|
|||
if let Some(node) = child.child_by_field_name(field) {
|
||||
let mut tmp = Vec::new();
|
||||
collect_idents(node, code, &mut tmp);
|
||||
let candidate = if lang == "rust" {
|
||||
tmp.into_iter().last()
|
||||
let primary = if lang == "rust" {
|
||||
// Rust: last ident is the binding name (e.g.
|
||||
// `Path(project_id): Path<i64>` → `project_id`).
|
||||
tmp.pop()
|
||||
} else if tmp.is_empty() {
|
||||
None
|
||||
} else {
|
||||
tmp.into_iter().next()
|
||||
Some(tmp.remove(0))
|
||||
};
|
||||
if let Some(name) = candidate {
|
||||
if let Some(name) = primary {
|
||||
let ty = classify_param_type(child, lang, code);
|
||||
out.push((name, ty));
|
||||
// Surface destructured siblings only when the
|
||||
// pattern node is a destructure container. For
|
||||
// ordinary (non-destructured) params, `tmp` is
|
||||
// already empty after `pop()` / `remove(0)`.
|
||||
// Object-pattern children of the same slot
|
||||
// (`{ a, b, c }`) leave the remaining names in
|
||||
// `tmp`, which become the slot's siblings.
|
||||
let siblings = sibling_names_for_destructure(node, &tmp, lang);
|
||||
out.push((name, ty, siblings));
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
|
|
@ -92,7 +115,7 @@ pub(super) fn extract_param_meta<'a>(
|
|||
&& child.kind() == "identifier"
|
||||
&& let Some(txt) = text_of(child, code)
|
||||
{
|
||||
out.push((txt, None));
|
||||
out.push((txt, None, Vec::new()));
|
||||
found = true;
|
||||
}
|
||||
// Fallback for C/C++: look for nested declarator → identifier
|
||||
|
|
@ -101,7 +124,7 @@ pub(super) fn extract_param_meta<'a>(
|
|||
collect_idents(child, code, &mut tmp);
|
||||
if let Some(last) = tmp.pop() {
|
||||
let ty = classify_param_type(child, lang, code);
|
||||
out.push((last, ty));
|
||||
out.push((last, ty, Vec::new()));
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
|
|
@ -112,12 +135,22 @@ pub(super) fn extract_param_meta<'a>(
|
|||
// *first* identifier, that is the parameter name; subsequent
|
||||
// identifiers are part of the type annotation or default
|
||||
// expression.
|
||||
//
|
||||
// Destructure-container case (JS arrow `({ a, b }) => …`):
|
||||
// when the child node IS a destructure pattern itself (no
|
||||
// `required_parameter` / `assignment_pattern` wrapper), the
|
||||
// remaining idents after the primary are destructured
|
||||
// bindings sharing this slot — surface them as siblings so
|
||||
// per-parameter summary probing seeds every binding the
|
||||
// slot produces.
|
||||
if !found {
|
||||
let mut tmp = Vec::new();
|
||||
collect_idents(child, code, &mut tmp);
|
||||
if let Some(first) = tmp.into_iter().next() {
|
||||
if !tmp.is_empty() {
|
||||
let first = tmp.remove(0);
|
||||
let ty = classify_param_type(child, lang, code);
|
||||
out.push((first, ty));
|
||||
let siblings = sibling_names_for_destructure(child, &tmp, lang);
|
||||
out.push((first, ty, siblings));
|
||||
}
|
||||
}
|
||||
continue;
|
||||
|
|
@ -127,13 +160,52 @@ pub(super) fn extract_param_meta<'a>(
|
|||
// where the child is an `identifier` node, not a `parameter` wrapper.
|
||||
if child.kind() == "identifier" {
|
||||
if let Some(txt) = text_of(child, code) {
|
||||
out.push((txt, None));
|
||||
out.push((txt, None, Vec::new()));
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Return destructured field-name siblings for a parameter's pattern
|
||||
/// node, but only when the pattern is a recognised destructure
|
||||
/// container (object / record pattern). For ordinary patterns the
|
||||
/// `remaining` slice is already empty so this is a noop. Restricting
|
||||
/// the return to destructure containers prevents typed-parameter
|
||||
/// idioms (`Path<i64>`, `@PathVariable Long userId`, Rust extractor
|
||||
/// wrappers) from accidentally surfacing the type identifier as a
|
||||
/// destructured sibling.
|
||||
fn sibling_names_for_destructure(
|
||||
pattern: Node<'_>,
|
||||
remaining: &[String],
|
||||
lang: &str,
|
||||
) -> Vec<String> {
|
||||
if remaining.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
if !is_destructure_container_kind(pattern.kind(), lang) {
|
||||
return Vec::new();
|
||||
}
|
||||
remaining.to_vec()
|
||||
}
|
||||
|
||||
/// Recognise tree-sitter pattern node kinds that destructure a
|
||||
/// single argument into multiple bindings — JS/TS object patterns
|
||||
/// today, plus Python's `pattern_list` / `tuple_pattern` for kwargs
|
||||
/// destructure if those ever come through this path. Conservative:
|
||||
/// only kinds we have explicit per-language reasoning for return
|
||||
/// `true`; everything else returns `false` so the existing single-
|
||||
/// name fallback path is preserved untouched.
|
||||
fn is_destructure_container_kind(kind: &str, lang: &str) -> bool {
|
||||
match (lang, kind) {
|
||||
("javascript" | "typescript", "object_pattern") => true,
|
||||
// Future languages: array pattern (`[a, b]`) is intentionally
|
||||
// omitted — the index-based unpacking is positional, and the
|
||||
// names don't map cleanly to "all share slot 0".
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Walk up from a function definition node and build a container path.
|
||||
///
|
||||
/// Records the names of enclosing classes / impls / modules / namespaces /
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ use super::rules;
|
|||
use super::{AnalysisContext, CfgAnalysis, CfgFinding, Confidence};
|
||||
use crate::cfg::{EdgeKind, StmtKind};
|
||||
use crate::patterns::Severity;
|
||||
use crate::symbol::Lang;
|
||||
use petgraph::graph::NodeIndex;
|
||||
use petgraph::visit::EdgeRef;
|
||||
use std::collections::HashSet;
|
||||
|
|
@ -423,6 +424,23 @@ impl CfgAnalysis for ResourceMisuse {
|
|||
if ctx.cfg[acquire].managed_resource {
|
||||
continue;
|
||||
}
|
||||
// SAFE-FOR-FIELD-LHS (Go only): skip member-expression
|
||||
// LHS acquires. `b.cpuprof = os.Create(...)` transfers
|
||||
// ownership to the containing struct; closure
|
||||
// responsibility belongs to a paired Stop()/Release()
|
||||
// method on the struct's lifecycle. Mirrors the gate
|
||||
// in src/state/transfer.rs::apply_call. Production
|
||||
// trigger: prometheus
|
||||
// cmd/promtool/tsdb.go::startProfiling cluster.
|
||||
// Restricted to Go because TS/JS class-field acquires
|
||||
// (`this.fd = fs.openSync(...)`) are still expected to
|
||||
// be tracked — the leak fixtures rely on it.
|
||||
if ctx.lang == Lang::Go
|
||||
&& let Some(acquired_var) = ctx.cfg[acquire].taint.defines.as_deref()
|
||||
&& acquired_var.contains('.')
|
||||
{
|
||||
continue;
|
||||
}
|
||||
// Suppress resources with a deferred release (Go `defer f.Close()`).
|
||||
// Defer guarantees cleanup on all exit paths including early returns.
|
||||
if let Some(acquired_var) = ctx.cfg[acquire].taint.defines.as_deref() {
|
||||
|
|
|
|||
|
|
@ -2516,6 +2516,7 @@ fn ssa_summaries_round_trip() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
),
|
||||
|
|
@ -2551,6 +2552,7 @@ fn ssa_summaries_round_trip() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
),
|
||||
|
|
@ -2724,6 +2726,7 @@ fn ssa_summaries_hash_rescan_replaces_stale() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
)];
|
||||
|
|
@ -2761,6 +2764,7 @@ fn ssa_summaries_hash_rescan_replaces_stale() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
)];
|
||||
|
|
@ -2819,6 +2823,7 @@ fn clear_drops_ssa_summaries_table() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
)];
|
||||
|
|
@ -3092,6 +3097,7 @@ fn make_test_ssa_summary() -> crate::summary::ssa_summary::SsaFuncSummary {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -134,6 +134,9 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::CODE_EXEC),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// (Lodash `_.template` is modeled as a gated sink in `GATED_SINKS`
|
||||
// below — the gate inspects arg 1's options object so the patched
|
||||
// `{ evaluate: false }` form is suppressed.)
|
||||
LabelRule {
|
||||
matchers: &["innerHTML", "dangerouslySetInnerHTML"],
|
||||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
|
|
@ -377,6 +380,46 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::ValueMatch,
|
||||
},
|
||||
// Lodash `_.template(template, options?)` — server-side template
|
||||
// injection sink. Lodash's template parser by default compiles
|
||||
// `<% ... %>` evaluate blocks into a JavaScript Function via the
|
||||
// `Function` constructor; when the template string is attacker-
|
||||
// controlled this is RCE (Strapi CVE-2023-22621 et al.).
|
||||
//
|
||||
// Gate: activate on arg 0 (the template string). Inspect arg 1's
|
||||
// options object for `evaluate: false`; when present as a literal
|
||||
// the evaluate-block compiler is disabled and the call is safe.
|
||||
// Missing arg 1, missing `evaluate` key, or a dynamic value all
|
||||
// fall through `ValueMatch`'s `None` branch and fire conservatively.
|
||||
//
|
||||
// The `keyword_name`-based activation reads the property value via
|
||||
// the JS-side closure augmentation in `cfg/mod.rs`, which falls
|
||||
// back to walking the call's arg-1 object literal when the
|
||||
// language-default `keyword_argument` extraction yields nothing.
|
||||
SinkGate {
|
||||
callee_matcher: "_.template",
|
||||
arg_index: 0,
|
||||
dangerous_values: &["true"],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::CODE_EXEC),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: Some("evaluate"),
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::ValueMatch,
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "lodash.template",
|
||||
arg_index: 0,
|
||||
dangerous_values: &["true"],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::CODE_EXEC),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: Some("evaluate"),
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::ValueMatch,
|
||||
},
|
||||
// ── Outbound HTTP clients (SSRF) ──────────────────────────────────────
|
||||
//
|
||||
// Policy: SSRF fires only when taint reaches the destination-bearing
|
||||
|
|
@ -810,7 +853,21 @@ pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
|||
|
||||
pub static PARAM_CONFIG: ParamConfig = ParamConfig {
|
||||
params_field: "parameters",
|
||||
param_node_kinds: &["identifier"],
|
||||
// `identifier` covers bare params (`a`); `assignment_pattern` covers
|
||||
// default-value params (`a = {}`). Without `assignment_pattern`,
|
||||
// tree-sitter wraps the identifier in a node the param walker
|
||||
// doesn't recognize, and `extract_param_meta` produces a
|
||||
// parameter-less summary for any function whose params have
|
||||
// defaults — breaking cross-function `param_to_sink` propagation
|
||||
// for shapes like `(emailOptions = {}, emailTemplate = {}, data = {}) => …`.
|
||||
// `object_pattern` covers destructured object formals (`({ a, b })`),
|
||||
// which tree-sitter-javascript exposes as a direct child of
|
||||
// `formal_parameters` (no `required_parameter` wrapper as in TS).
|
||||
// Without it the per-parameter probe never seeds the destructured
|
||||
// bindings and summary extraction misses `validated_params_to_return`
|
||||
// for shapes like `({ value }) => { validate(value); ... }` —
|
||||
// residual gap behind CVE-2026-25544.
|
||||
param_node_kinds: &["identifier", "assignment_pattern", "object_pattern"],
|
||||
self_param_kinds: &[],
|
||||
ident_fields: &["name", "pattern"],
|
||||
};
|
||||
|
|
|
|||
|
|
@ -2166,6 +2166,7 @@ mod tests {
|
|||
|
||||
let ctx = FrameworkContext {
|
||||
frameworks: vec![DetectedFramework::Echo],
|
||||
inspected_langs: std::collections::HashSet::new(),
|
||||
};
|
||||
let rules = go::framework_rules(&ctx);
|
||||
let extras = rules.to_vec();
|
||||
|
|
@ -2194,6 +2195,7 @@ mod tests {
|
|||
|
||||
let ctx = FrameworkContext {
|
||||
frameworks: vec![DetectedFramework::Koa],
|
||||
inspected_langs: std::collections::HashSet::new(),
|
||||
};
|
||||
let extras = javascript::framework_rules(&ctx);
|
||||
|
||||
|
|
@ -2224,6 +2226,7 @@ mod tests {
|
|||
|
||||
let ctx = FrameworkContext {
|
||||
frameworks: vec![DetectedFramework::Fastify],
|
||||
inspected_langs: std::collections::HashSet::new(),
|
||||
};
|
||||
let extras = typescript::framework_rules(&ctx);
|
||||
|
||||
|
|
@ -2250,6 +2253,7 @@ mod tests {
|
|||
|
||||
let ctx = FrameworkContext {
|
||||
frameworks: vec![DetectedFramework::Sinatra],
|
||||
inspected_langs: std::collections::HashSet::new(),
|
||||
};
|
||||
let rules = ruby::framework_rules(&ctx);
|
||||
let extras = rules.to_vec();
|
||||
|
|
@ -2274,6 +2278,7 @@ mod tests {
|
|||
|
||||
let ctx = FrameworkContext {
|
||||
frameworks: vec![DetectedFramework::Axum],
|
||||
inspected_langs: std::collections::HashSet::new(),
|
||||
};
|
||||
let extras = rust::framework_rules(&ctx);
|
||||
|
||||
|
|
@ -2304,6 +2309,7 @@ mod tests {
|
|||
|
||||
let ctx = FrameworkContext {
|
||||
frameworks: vec![DetectedFramework::ActixWeb],
|
||||
inspected_langs: std::collections::HashSet::new(),
|
||||
};
|
||||
let extras = rust::framework_rules(&ctx);
|
||||
|
||||
|
|
@ -2327,6 +2333,7 @@ mod tests {
|
|||
|
||||
let ctx = FrameworkContext {
|
||||
frameworks: vec![DetectedFramework::Rocket],
|
||||
inspected_langs: std::collections::HashSet::new(),
|
||||
};
|
||||
let extras = rust::framework_rules(&ctx);
|
||||
|
||||
|
|
|
|||
|
|
@ -1873,6 +1873,7 @@ function consume() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
|
|
|
|||
|
|
@ -445,6 +445,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
)],
|
||||
|
|
@ -663,6 +664,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
)],
|
||||
|
|
|
|||
|
|
@ -314,8 +314,31 @@ impl DefaultTransfer<'_> {
|
|||
}
|
||||
|
||||
// ── Resource acquire ─────────────────────────────────────────────
|
||||
// SAFE-FOR-FIELD-LHS (Go only): skip member-expression LHS
|
||||
// acquires. A `b.cpuprof = os.Create(...)` pattern transfers
|
||||
// ownership to the containing struct; the local function body
|
||||
// cannot observe the closure (which lives in a paired
|
||||
// Stop()/dispose() method), so tracking `b.cpuprof` as a local
|
||||
// resource is a guaranteed FP at function exit. Mirrors the
|
||||
// gate in src/cfg_analysis/resources.rs::run. Production
|
||||
// trigger: prometheus cmd/promtool/tsdb.go::startProfiling
|
||||
// cluster (b.cpuprof, b.memprof, b.blockprof, b.mtxprof).
|
||||
// Restricted to Go because TS/JS class-field acquires
|
||||
// (`this.fd = fs.openSync(...)`) are still expected to be
|
||||
// tracked — the leak fixtures rely on it.
|
||||
let mut direct_acquire = false;
|
||||
for pair in self.resource_pairs {
|
||||
let define_is_field_lhs = self.lang == Lang::Go
|
||||
&& info
|
||||
.taint
|
||||
.defines
|
||||
.as_deref()
|
||||
.is_some_and(|d| d.contains('.'));
|
||||
let resource_pairs_iter: &[ResourcePair] = if define_is_field_lhs {
|
||||
&[]
|
||||
} else {
|
||||
self.resource_pairs
|
||||
};
|
||||
for pair in resource_pairs_iter {
|
||||
let is_acquire = pair.acquire.iter().any(|a| callee_matches(&callee, a));
|
||||
let is_excluded = pair
|
||||
.exclude_acquire
|
||||
|
|
@ -369,6 +392,50 @@ impl DefaultTransfer<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
// INNER-CALL-RELEASE-IN-ARG: walk info.arg_callees so a release
|
||||
// method that lives in argument position is still observed.
|
||||
// Production triggers: `require.NoError(t, f.Close())` (Go
|
||||
// testify), `errs = append(errs, f.Close())`, JUnit
|
||||
// `assertEquals(0, in.read())`. Conservative: bare-receiver
|
||||
// inner calls only (recv has no dot — chained-receiver
|
||||
// releases are owned by chain_proxies which doesn't observe
|
||||
// inner-call positions today); marks CLOSED only (no
|
||||
// DoubleClose since attribution is approximate); respects
|
||||
// in_defer for symmetry with the direct-release branch above.
|
||||
if !info.in_defer && !info.arg_callees.is_empty() {
|
||||
for arg_callee in &info.arg_callees {
|
||||
let Some(arg_callee_text) = arg_callee.as_deref() else {
|
||||
continue;
|
||||
};
|
||||
let Some((recv_text, _method)) = try_chain_decompose(arg_callee_text) else {
|
||||
continue;
|
||||
};
|
||||
if recv_text.contains('.') {
|
||||
continue;
|
||||
}
|
||||
let arg_callee_lower = arg_callee_text.to_ascii_lowercase();
|
||||
let matches_release = self.resource_pairs.iter().any(|p| {
|
||||
p.release
|
||||
.iter()
|
||||
.any(|r| callee_matches(&arg_callee_lower, r))
|
||||
});
|
||||
if !matches_release {
|
||||
continue;
|
||||
}
|
||||
let Some(sym) = self.get_sym(info, recv_text) else {
|
||||
continue;
|
||||
};
|
||||
if released.contains(&sym) {
|
||||
continue;
|
||||
}
|
||||
let current = state.resource.get(sym);
|
||||
if current.contains(ResourceLifecycle::OPEN) {
|
||||
state.resource.set(sym, ResourceLifecycle::CLOSED);
|
||||
released.push(sym);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Resource method proxy ────────────────────────────────────────
|
||||
// When no direct resource pair matched, check if the callee is a
|
||||
// method wrapper for a known resource operation.
|
||||
|
|
@ -1985,4 +2052,187 @@ mod tests {
|
|||
assert_eq!(state.receiver_class_group.get(&sym_f), Some(&class_group));
|
||||
assert!(state.chain_proxies.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn inner_call_release_in_arg_marks_closed() {
|
||||
let mut interner = SymbolInterner::new();
|
||||
let sym_f = interner.intern_scoped(None, "f");
|
||||
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::Go,
|
||||
resource_pairs: rules::resource_pairs(Lang::Go),
|
||||
interner: &interner,
|
||||
resource_method_summaries: &[],
|
||||
ptr_proxy_hints: None,
|
||||
};
|
||||
|
||||
let mut state = ProductState::initial();
|
||||
state.resource.set(sym_f, ResourceLifecycle::OPEN);
|
||||
|
||||
let info = NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
ast: AstMeta {
|
||||
span: (0, 30),
|
||||
..Default::default()
|
||||
},
|
||||
taint: TaintMeta {
|
||||
uses: vec!["t".into(), "f".into()],
|
||||
..Default::default()
|
||||
},
|
||||
call: CallMeta {
|
||||
callee: Some("require.NoError".into()),
|
||||
..Default::default()
|
||||
},
|
||||
arg_callees: vec![None, Some("f.Close".into())],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let (state, events) = transfer.apply(NodeIndex::new(0), &info, None, state);
|
||||
assert!(events.is_empty());
|
||||
assert_eq!(state.resource.get(sym_f), ResourceLifecycle::CLOSED);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn inner_call_release_in_arg_chained_receiver_skipped() {
|
||||
let mut interner = SymbolInterner::new();
|
||||
let sym_c = interner.intern_scoped(None, "c");
|
||||
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::Go,
|
||||
resource_pairs: rules::resource_pairs(Lang::Go),
|
||||
interner: &interner,
|
||||
resource_method_summaries: &[],
|
||||
ptr_proxy_hints: None,
|
||||
};
|
||||
|
||||
let mut state = ProductState::initial();
|
||||
state.resource.set(sym_c, ResourceLifecycle::OPEN);
|
||||
|
||||
let info = NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
ast: AstMeta {
|
||||
span: (0, 30),
|
||||
..Default::default()
|
||||
},
|
||||
taint: TaintMeta {
|
||||
uses: vec!["c".into()],
|
||||
..Default::default()
|
||||
},
|
||||
call: CallMeta {
|
||||
callee: Some("t.Helper".into()),
|
||||
..Default::default()
|
||||
},
|
||||
arg_callees: vec![Some("c.mu.Unlock".into())],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let (state, _) = transfer.apply(NodeIndex::new(0), &info, None, state);
|
||||
assert_eq!(state.resource.get(sym_c), ResourceLifecycle::OPEN);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn inner_call_release_in_arg_respects_in_defer() {
|
||||
let mut interner = SymbolInterner::new();
|
||||
let sym_f = interner.intern_scoped(None, "f");
|
||||
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::Go,
|
||||
resource_pairs: rules::resource_pairs(Lang::Go),
|
||||
interner: &interner,
|
||||
resource_method_summaries: &[],
|
||||
ptr_proxy_hints: None,
|
||||
};
|
||||
|
||||
let mut state = ProductState::initial();
|
||||
state.resource.set(sym_f, ResourceLifecycle::OPEN);
|
||||
|
||||
let info = NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
ast: AstMeta {
|
||||
span: (0, 30),
|
||||
..Default::default()
|
||||
},
|
||||
taint: TaintMeta {
|
||||
uses: vec!["f".into()],
|
||||
..Default::default()
|
||||
},
|
||||
call: CallMeta {
|
||||
callee: Some("log.Print".into()),
|
||||
..Default::default()
|
||||
},
|
||||
arg_callees: vec![Some("f.Close".into())],
|
||||
in_defer: true,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let (state, _) = transfer.apply(NodeIndex::new(0), &info, None, state);
|
||||
assert_eq!(state.resource.get(sym_f), ResourceLifecycle::OPEN);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn member_field_lhs_acquire_skips_resource_state() {
|
||||
let interner = SymbolInterner::new();
|
||||
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::Go,
|
||||
resource_pairs: rules::resource_pairs(Lang::Go),
|
||||
interner: &interner,
|
||||
resource_method_summaries: &[],
|
||||
ptr_proxy_hints: None,
|
||||
};
|
||||
|
||||
let info = NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
ast: AstMeta {
|
||||
span: (0, 30),
|
||||
..Default::default()
|
||||
},
|
||||
taint: TaintMeta {
|
||||
defines: Some("b.cpuprof".into()),
|
||||
..Default::default()
|
||||
},
|
||||
call: CallMeta {
|
||||
callee: Some("os.Create".into()),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let (state, _) = transfer.apply(NodeIndex::new(0), &info, None, ProductState::initial());
|
||||
assert!(state.resource.vars.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bare_ident_lhs_acquire_still_tracks() {
|
||||
let mut interner = SymbolInterner::new();
|
||||
let sym_f = interner.intern_scoped(None, "f");
|
||||
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::Go,
|
||||
resource_pairs: rules::resource_pairs(Lang::Go),
|
||||
interner: &interner,
|
||||
resource_method_summaries: &[],
|
||||
ptr_proxy_hints: None,
|
||||
};
|
||||
|
||||
let info = NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
ast: AstMeta {
|
||||
span: (0, 30),
|
||||
..Default::default()
|
||||
},
|
||||
taint: TaintMeta {
|
||||
defines: Some("f".into()),
|
||||
..Default::default()
|
||||
},
|
||||
call: CallMeta {
|
||||
callee: Some("os.Open".into()),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let (state, _) = transfer.apply(NodeIndex::new(0), &info, None, ProductState::initial());
|
||||
assert!(state.resource.get(sym_f).contains(ResourceLifecycle::OPEN));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -325,6 +325,28 @@ pub struct SsaFuncSummary {
|
|||
/// can be joined by ordinal at call-graph build time.
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub typed_call_receivers: Vec<(u32, String)>,
|
||||
/// Parameter indices whose taint flow to the return value is fully
|
||||
/// validated by a dominating predicate (regex allowlist, type check,
|
||||
/// validation call, etc.) on every return path inside the function.
|
||||
///
|
||||
/// At a call site, each tainted argument passed to a position in
|
||||
/// this list — and the call's own return value — are marked
|
||||
/// `validated_must` / `validated_may` in the caller's SSA taint
|
||||
/// state, the same way an inline `if (!regex.test(x)) throw` would
|
||||
/// validate the surviving branch. Sound because the call only
|
||||
/// returns normally on the validating arm; if validation failed,
|
||||
/// control would not reach the post-call instruction.
|
||||
///
|
||||
/// Populated by
|
||||
/// [`crate::taint::ssa_transfer::summary_extract::extract_ssa_func_summary`]
|
||||
/// when a per-parameter probe shows the parameter's `var_name` in
|
||||
/// `validated_must` at every return block of the helper. Empty
|
||||
/// (the default) for helpers that do not validate any parameter.
|
||||
/// Closes the validated-flow propagation gap that left
|
||||
/// CVE-2026-25544 (Payload `sanitizeValue` SQL injection) detecting
|
||||
/// on both vulnerable and patched code.
|
||||
#[serde(default, skip_serializing_if = "SmallVec::is_empty")]
|
||||
pub validated_params_to_return: SmallVec<[usize; 2]>,
|
||||
}
|
||||
|
||||
/// A per-return-path [`PathFact`] entry.
|
||||
|
|
|
|||
|
|
@ -441,6 +441,7 @@ fn ssa_summary_serde_round_trip_identity() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
|
|
@ -474,6 +475,7 @@ fn ssa_summary_serde_round_trip_strip_bits() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
|
|
@ -504,6 +506,7 @@ fn ssa_summary_serde_round_trip_add_bits() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
|
|
@ -541,6 +544,7 @@ fn ssa_summary_serde_round_trip_all_variants() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
|
|
@ -580,6 +584,7 @@ fn global_summaries_insert_ssa_exact_key_replacement() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
gs.insert_ssa(key.clone(), v1.clone());
|
||||
|
|
@ -607,6 +612,7 @@ fn global_summaries_insert_ssa_exact_key_replacement() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
gs.insert_ssa(key.clone(), v2.clone());
|
||||
|
|
@ -654,6 +660,7 @@ fn global_summaries_merge_with_ssa_entries() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
let sum_b = SsaFuncSummary {
|
||||
|
|
@ -677,6 +684,7 @@ fn global_summaries_merge_with_ssa_entries() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
|
||||
|
|
@ -724,6 +732,7 @@ fn global_summaries_is_empty_considers_ssa() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
|
|
@ -754,6 +763,7 @@ fn ssa_summary_serde_round_trip_param_to_sink_param() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
|
|
@ -799,6 +809,7 @@ fn ssa_summary_serde_round_trip_container_fields() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
|
|
@ -854,6 +865,7 @@ fn ssa_summary_serde_round_trip_return_abstract() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
|
|
@ -1375,6 +1387,7 @@ fn global_summaries_resolve_body_requires_body_present() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
|
|
@ -3519,6 +3532,7 @@ fn cf4_return_path_transform_serde_round_trip() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
|
|
|
|||
|
|
@ -1593,6 +1593,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
|
|
@ -1662,6 +1663,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
|
|
@ -1731,6 +1733,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
|
|
@ -1795,6 +1798,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
|
|
@ -1859,6 +1863,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
|
|
@ -2057,6 +2062,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
|
|
@ -2136,6 +2142,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
|
|
@ -2216,6 +2223,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
|
|
@ -2246,6 +2254,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
|
|
@ -2276,6 +2285,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
|
|
@ -2355,6 +2365,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
|
|
@ -2436,6 +2447,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
|
|
@ -2465,6 +2477,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
validated_params_to_return: smallvec::SmallVec::new(),
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
|
|
|
|||
|
|
@ -1406,6 +1406,7 @@ pub(crate) fn extract_intra_file_ssa_summaries(
|
|||
mod_aliases_ref,
|
||||
None,
|
||||
Some(&formal_params),
|
||||
None,
|
||||
);
|
||||
|
||||
// Only store if the summary has observable effects. With
|
||||
|
|
@ -1531,6 +1532,11 @@ pub(crate) fn lower_all_functions_from_bodies(
|
|||
} else {
|
||||
Some(&mod_aliases)
|
||||
};
|
||||
let formal_destructured = if !body.meta.param_destructured_fields.is_empty() {
|
||||
Some(body.meta.param_destructured_fields.as_slice())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let summary = ssa_transfer::extract_ssa_func_summary(
|
||||
&func_ssa,
|
||||
&body.graph,
|
||||
|
|
@ -1543,6 +1549,7 @@ pub(crate) fn lower_all_functions_from_bodies(
|
|||
mod_aliases_ref,
|
||||
locator,
|
||||
Some(formal_params),
|
||||
formal_destructured,
|
||||
);
|
||||
|
||||
// Always insert the summary, even when all fields are empty/default.
|
||||
|
|
@ -1775,6 +1782,11 @@ fn rerun_extraction_with_augmented_summaries(
|
|||
Some(&mod_aliases)
|
||||
};
|
||||
|
||||
let formal_destructured = if !body.meta.param_destructured_fields.is_empty() {
|
||||
Some(body.meta.param_destructured_fields.as_slice())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let new_summary = ssa_transfer::extract_ssa_func_summary_full(
|
||||
&callee.ssa,
|
||||
parent_cfg,
|
||||
|
|
@ -1788,6 +1800,7 @@ fn rerun_extraction_with_augmented_summaries(
|
|||
locator,
|
||||
Some(&body.meta.params),
|
||||
Some(&augmented_snapshot),
|
||||
formal_destructured,
|
||||
);
|
||||
|
||||
// OR-merge sink-only fields into the existing summary.
|
||||
|
|
@ -1796,8 +1809,16 @@ fn rerun_extraction_with_augmented_summaries(
|
|||
}
|
||||
}
|
||||
|
||||
/// OR-merge `param_to_sink` and `param_to_sink_param` from `src` into
|
||||
/// `dst`. Existing entries are preserved; only NEW entries are added.
|
||||
/// OR-merge `param_to_sink`, `param_to_sink_param`, and
|
||||
/// `validated_params_to_return` from `src` into `dst`. Existing entries
|
||||
/// are preserved; only NEW entries are added.
|
||||
///
|
||||
/// The validated-param list grows monotonically across extraction
|
||||
/// rounds: a parameter that proves validated under any extraction
|
||||
/// pass (the augmented second pass typically resolves more
|
||||
/// cross-function summaries than the first) stays validated. Drops
|
||||
/// here would silently lose CVE-2026-25544-class precision the
|
||||
/// re-extraction pass was specifically designed to recover.
|
||||
fn merge_sink_fields(
|
||||
dst: &mut crate::summary::ssa_summary::SsaFuncSummary,
|
||||
src: &crate::summary::ssa_summary::SsaFuncSummary,
|
||||
|
|
@ -1823,6 +1844,11 @@ fn merge_sink_fields(
|
|||
dst.param_to_sink_param.push((idx, pos, caps));
|
||||
}
|
||||
}
|
||||
for &idx in &src.validated_params_to_return {
|
||||
if !dst.validated_params_to_return.contains(&idx) {
|
||||
dst.validated_params_to_return.push(idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Walk lexical-containment children of every parent body and lift
|
||||
|
|
|
|||
|
|
@ -377,6 +377,24 @@ pub fn classify_condition(text: &str) -> PredicateKind {
|
|||
return PredicateKind::ValidationCall;
|
||||
}
|
||||
|
||||
// Regex / pattern allowlist `<X>.test(value)` / `<X>.match(value)` calls
|
||||
// where the receiver name carries a regex or pattern marker. The
|
||||
// standard JS / TS / Python / Java / Ruby / Go regex APIs all expose a
|
||||
// boolean test method; the success arm (true) means `value` matches the
|
||||
// pattern. Conservative on receiver names so non-regex methods like
|
||||
// `obj.test(x)` (test runner), `db.test(...)` (test column) etc. don't
|
||||
// get pulled in. Motivated by Payload CVE-2026-25544
|
||||
// (`if (!SAFE_STRING_REGEX.test(value)) throw …;`).
|
||||
if (bare == "test" || bare == "match" || bare == "matches")
|
||||
&& let Some(dot_pos) = callee_part.rfind('.')
|
||||
{
|
||||
let receiver = &callee_part[..dot_pos];
|
||||
let receiver_lower = receiver.to_ascii_lowercase();
|
||||
if receiver_lower.contains("regex") || receiver_lower.contains("pattern") {
|
||||
return PredicateKind::ValidationCall;
|
||||
}
|
||||
}
|
||||
|
||||
// Sanitizer
|
||||
if bare.contains("sanitiz") || bare.contains("escape") || bare.contains("encode") {
|
||||
return PredicateKind::SanitizerCall;
|
||||
|
|
@ -638,6 +656,19 @@ fn extract_validation_target(text: &str) -> Option<String> {
|
|||
// Check for method call pattern: `x.method(...)` or `x.method_name(...)`
|
||||
if let Some(dot_pos) = callee_part.rfind('.') {
|
||||
let receiver = callee_part[..dot_pos].trim();
|
||||
let method = callee_part[dot_pos + 1..].trim().to_ascii_lowercase();
|
||||
// Regex-allowlist `<re>.test(value)` / `<re>.match(value)` / `<re>.matches(value)`:
|
||||
// the validated target is the call's first argument, not the regex
|
||||
// receiver. Without this special case, branch narrowing would mark
|
||||
// the regex itself as validated and leave the user input alone.
|
||||
if matches!(method.as_str(), "test" | "match" | "matches")
|
||||
&& let Some(first_arg) = first_call_arg(args_part)
|
||||
{
|
||||
let first_arg = first_arg.strip_prefix('&').unwrap_or(first_arg).trim();
|
||||
if !first_arg.is_empty() && is_identifier(first_arg) {
|
||||
return Some(first_arg.to_string());
|
||||
}
|
||||
}
|
||||
if !receiver.is_empty() && is_identifier(receiver) {
|
||||
return Some(receiver.to_string());
|
||||
}
|
||||
|
|
@ -977,6 +1008,33 @@ mod tests {
|
|||
assert_eq!(target.as_deref(), Some("x"));
|
||||
}
|
||||
|
||||
/// Regex `<X>.test(value)` should classify as ValidationCall and the
|
||||
/// validated target should be the call argument, not the regex
|
||||
/// receiver. Pinned because the receiver-as-target heuristic is the
|
||||
/// default for method calls. Motivated by Payload CVE-2026-25544
|
||||
/// (`if (!SAFE_STRING_REGEX.test(value)) throw …;`).
|
||||
#[test]
|
||||
fn target_regex_test_first_arg() {
|
||||
let (kind, target) = classify_condition_with_target("!SAFE_STRING_REGEX.test(value)");
|
||||
assert_eq!(kind, PredicateKind::ValidationCall);
|
||||
assert_eq!(target.as_deref(), Some("value"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn target_regex_test_pattern_receiver() {
|
||||
let (kind, target) = classify_condition_with_target("ALLOWED_PATTERN.test(s)");
|
||||
assert_eq!(kind, PredicateKind::ValidationCall);
|
||||
assert_eq!(target.as_deref(), Some("s"));
|
||||
}
|
||||
|
||||
/// Receiver name without a regex/pattern marker should NOT be pulled
|
||||
/// in as a validator: `obj.test(x)` is a test runner, not a regex.
|
||||
#[test]
|
||||
fn target_test_non_regex_receiver_is_not_validation() {
|
||||
let kind = classify_condition("obj.test(value)");
|
||||
assert_eq!(kind, PredicateKind::Unknown);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn target_comparison_extracts_identifier_side() {
|
||||
let (kind, target) = classify_condition_with_target("x == 5");
|
||||
|
|
|
|||
|
|
@ -3499,7 +3499,21 @@ pub(super) fn transfer_inst(
|
|||
// `ssa/lower.rs`), which inflates `args.len()` beyond the real
|
||||
// positional arity. The CFG's `arg_uses` is the authoritative
|
||||
// positional-arg list.
|
||||
let arity_hint = info.call.arg_uses.len();
|
||||
//
|
||||
// Fallback: certain TypeScript call shapes — notably calls
|
||||
// inside template-string substitutions (`${fn(arg)}`) — get
|
||||
// their `arg_uses` dropped by CFG lowering even though the
|
||||
// call's positional `args` are intact. When that happens
|
||||
// the strict `Some(0)` arity hint silently fails to match
|
||||
// any callee that takes ≥1 arg, swallowing summary
|
||||
// resolution. Detect the asymmetry and pass `None` so
|
||||
// `resolve_local_func_key_query`'s unique-name fallback
|
||||
// can still pick up the lone candidate.
|
||||
let arity_hint = if info.call.arg_uses.is_empty() && !args.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(info.call.arg_uses.len())
|
||||
};
|
||||
// Type-aware resolution: when the SSA receiver value has a
|
||||
// known abstract type (HttpClient, URL, …), feed that into
|
||||
// the resolver as an authoritative `receiver_type`. This
|
||||
|
|
@ -3511,7 +3525,7 @@ pub(super) fn transfer_inst(
|
|||
callee,
|
||||
caller_func,
|
||||
info.call.call_ordinal,
|
||||
Some(arity_hint),
|
||||
arity_hint,
|
||||
*receiver,
|
||||
);
|
||||
|
||||
|
|
@ -3627,6 +3641,43 @@ pub(super) fn transfer_inst(
|
|||
env.refine(inst.value, &fact);
|
||||
}
|
||||
}
|
||||
|
||||
// Validated-flow propagation through callee summaries.
|
||||
//
|
||||
// Runs regardless of whether inline analysis already
|
||||
// resolved the call: inline analysis re-runs the
|
||||
// callee's taint with caller-side seeds but does not
|
||||
// surface the callee's symbol-keyed
|
||||
// `validated_must` / `validated_may` state into the
|
||||
// caller, so the summary-level signal is the only
|
||||
// channel for propagating helper-validation across
|
||||
// a function boundary.
|
||||
//
|
||||
// When the callee's body validates a parameter on
|
||||
// every return path that carries the param's caps
|
||||
// (regex allowlist, type check, validation call, …),
|
||||
// a normal-returning call site is the validating arm
|
||||
// by construction: control could not reach the
|
||||
// post-call instruction unless the helper's
|
||||
// predicate(s) accepted the argument. Mark each
|
||||
// tainted argument's `var_name` and the call's
|
||||
// result `var_name` in the caller's
|
||||
// `validated_must` / `validated_may` sets so
|
||||
// subsequent sinks observe `all_validated = true`,
|
||||
// the same way an inline `if (!regex.test(x)) throw`
|
||||
// validates the surviving branch. Closes the
|
||||
// helper-validator propagation gap surfaced by
|
||||
// CVE-2026-25544 (Payload `sanitizeValue` SQLi).
|
||||
if !resolved.validated_params_to_return.is_empty() {
|
||||
propagate_validated_params_to_return(
|
||||
inst,
|
||||
args,
|
||||
ssa,
|
||||
transfer.interner,
|
||||
state,
|
||||
&resolved.validated_params_to_return,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// When find_classifiable_inner_call overrides the callee (e.g.
|
||||
|
|
@ -3640,7 +3691,7 @@ pub(super) fn transfer_inst(
|
|||
oc,
|
||||
caller_func,
|
||||
info.call.call_ordinal,
|
||||
Some(arity_hint),
|
||||
arity_hint,
|
||||
) {
|
||||
if resolved_container_to_return.is_empty() {
|
||||
resolved_container_to_return =
|
||||
|
|
@ -3735,6 +3786,24 @@ pub(super) fn transfer_inst(
|
|||
if !aggregate_sanitizer_applied {
|
||||
return_bits &= !resolved.sanitizer_caps;
|
||||
}
|
||||
|
||||
// Validated-flow propagation through callee summaries.
|
||||
//
|
||||
// When the callee's body validates a parameter on every
|
||||
// return path (regex allowlist, type check, validation
|
||||
// call, etc. — see
|
||||
// [`crate::summary::ssa_summary::SsaFuncSummary::validated_params_to_return`]),
|
||||
// a normal-returning call site is the validating arm by
|
||||
// construction: control could not reach the post-call
|
||||
// instruction unless the helper's predicate(s) accepted
|
||||
// the argument. Mark each tainted argument's `var_name`
|
||||
// and the call's result `var_name` in the caller's
|
||||
// `validated_must` / `validated_may` sets so subsequent
|
||||
// sinks observe `all_validated = true`, the same way an
|
||||
// inline `if (!regex.test(x)) throw` validates the
|
||||
// surviving branch. Closes the helper-validator
|
||||
// propagation gap surfaced by CVE-2026-25544 (Payload
|
||||
// `sanitizeValue` SQLi).
|
||||
}
|
||||
|
||||
// Type-qualified receiver resolution: when normal callee resolution
|
||||
|
|
@ -4236,7 +4305,7 @@ pub(super) fn transfer_inst(
|
|||
oc,
|
||||
caller_func,
|
||||
info.call.call_ordinal,
|
||||
Some(arity_hint),
|
||||
arity_hint,
|
||||
) {
|
||||
if !oc_sum.propagates_taint && oc_sum.source_caps.is_empty() {
|
||||
// Outer callee blocks taint: no param→return flow,
|
||||
|
|
@ -6301,6 +6370,60 @@ fn collect_args_taint(
|
|||
/// [`Cap::UNAUTHORIZED_ID`], ownership/membership guards prove on
|
||||
/// inputs rather than the return value. Other caps and origins are
|
||||
/// untouched.
|
||||
/// Apply [`SsaFuncSummary::validated_params_to_return`] at a call site.
|
||||
///
|
||||
/// For each parameter index `p` in `validated_params`, mark the
|
||||
/// `var_name` of every tainted SSA value at `args[p]` and the call's
|
||||
/// own result `inst.value` in the caller's `validated_must` /
|
||||
/// `validated_may` sets. Mirrors the symbol-keyed validation a direct
|
||||
/// `if (!regex.test(x)) throw` would set on the surviving branch.
|
||||
///
|
||||
/// Sound because the callee summary records `validated_params_to_return`
|
||||
/// only when the param's `var_name` is in `validated_must` at *every*
|
||||
/// return block — a normal-returning call therefore proves the
|
||||
/// validating arm. No-op when no actual argument is tainted (avoids
|
||||
/// spuriously validating untouched names downstream).
|
||||
fn propagate_validated_params_to_return(
|
||||
inst: &SsaInst,
|
||||
args: &[SmallVec<[SsaValue; 2]>],
|
||||
ssa: &SsaBody,
|
||||
interner: &crate::state::symbol::SymbolInterner,
|
||||
state: &mut SsaTaintState,
|
||||
validated_params: &[usize],
|
||||
) {
|
||||
let mark = |val: SsaValue, st: &mut SsaTaintState| {
|
||||
let Some(name) = ssa
|
||||
.value_defs
|
||||
.get(val.0 as usize)
|
||||
.and_then(|vd| vd.var_name.as_deref())
|
||||
else {
|
||||
return;
|
||||
};
|
||||
let Some(sym) = interner.get(name) else {
|
||||
return;
|
||||
};
|
||||
st.validated_must.insert(sym);
|
||||
st.validated_may.insert(sym);
|
||||
};
|
||||
|
||||
let mut any_arg_tainted = false;
|
||||
for &p in validated_params {
|
||||
let Some(arg_vals) = args.get(p) else {
|
||||
continue;
|
||||
};
|
||||
for &v in arg_vals {
|
||||
if state.get(v).is_some_and(|t| !t.caps.is_empty()) {
|
||||
any_arg_tainted = true;
|
||||
mark(v, state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if any_arg_tainted {
|
||||
mark(inst.value, state);
|
||||
}
|
||||
}
|
||||
|
||||
fn strip_cap_from_call_args(
|
||||
args: &[SmallVec<[SsaValue; 2]>],
|
||||
receiver: &Option<SsaValue>,
|
||||
|
|
@ -8676,6 +8799,14 @@ struct ResolvedSummary {
|
|||
/// `field_points_to` records. Applied at the caller call site by
|
||||
/// `apply_field_points_to_writes`.
|
||||
field_points_to: crate::summary::points_to::FieldPointsToSummary,
|
||||
/// Parameter indices whose taint flow to the return is fully
|
||||
/// validated by a dominating predicate inside the callee on every
|
||||
/// return path. Mirrors
|
||||
/// [`crate::summary::ssa_summary::SsaFuncSummary::validated_params_to_return`].
|
||||
/// Populated only via `convert_ssa_to_resolved`; other resolution
|
||||
/// paths leave it empty (label / coarse-FuncSummary forms cannot
|
||||
/// express per-path predicate validation).
|
||||
validated_params_to_return: Vec<usize>,
|
||||
}
|
||||
|
||||
fn resolve_callee(
|
||||
|
|
@ -8825,6 +8956,7 @@ fn resolve_callee_full(
|
|||
points_to: Default::default(),
|
||||
field_points_to: Default::default(),
|
||||
param_to_gate_filters: vec![],
|
||||
validated_params_to_return: vec![],
|
||||
});
|
||||
}
|
||||
// Try label classification for the bound function (by leaf name).
|
||||
|
|
@ -8896,6 +9028,7 @@ fn resolve_callee_full(
|
|||
points_to: Default::default(),
|
||||
field_points_to: Default::default(),
|
||||
param_to_gate_filters: vec![],
|
||||
validated_params_to_return: vec![],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -9041,6 +9174,7 @@ fn resolve_callee_full(
|
|||
points_to: Default::default(),
|
||||
field_points_to: Default::default(),
|
||||
param_to_gate_filters: vec![],
|
||||
validated_params_to_return: vec![],
|
||||
});
|
||||
}
|
||||
} else {
|
||||
|
|
@ -9091,6 +9225,7 @@ fn resolve_callee_full(
|
|||
points_to: Default::default(),
|
||||
field_points_to: Default::default(),
|
||||
param_to_gate_filters: vec![],
|
||||
validated_params_to_return: vec![],
|
||||
};
|
||||
match widened.len() {
|
||||
0 => {}
|
||||
|
|
@ -9162,6 +9297,7 @@ fn resolve_callee_full(
|
|||
points_to: Default::default(),
|
||||
field_points_to: Default::default(),
|
||||
param_to_gate_filters: vec![],
|
||||
validated_params_to_return: vec![],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -9344,6 +9480,7 @@ fn convert_ssa_to_resolved_for_caller(
|
|||
points_to: ssa_sum.points_to.clone(),
|
||||
field_points_to: ssa_sum.field_points_to.clone(),
|
||||
param_to_gate_filters: ssa_sum.param_to_gate_filters.clone(),
|
||||
validated_params_to_return: ssa_sum.validated_params_to_return.to_vec(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ pub fn extract_ssa_func_summary(
|
|||
module_aliases: Option<&HashMap<SsaValue, SmallVec<[String; 2]>>>,
|
||||
locator: Option<&crate::summary::SinkSiteLocator<'_>>,
|
||||
formal_param_names: Option<&[String]>,
|
||||
formal_destructured_fields: Option<&[Vec<String>]>,
|
||||
) -> crate::summary::ssa_summary::SsaFuncSummary {
|
||||
extract_ssa_func_summary_full(
|
||||
ssa,
|
||||
|
|
@ -64,6 +65,7 @@ pub fn extract_ssa_func_summary(
|
|||
locator,
|
||||
formal_param_names,
|
||||
None,
|
||||
formal_destructured_fields,
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -93,6 +95,15 @@ pub fn extract_ssa_func_summary_full(
|
|||
ssa_summaries: Option<
|
||||
&HashMap<crate::symbol::FuncKey, crate::summary::ssa_summary::SsaFuncSummary>,
|
||||
>,
|
||||
// Per-parameter destructured-binding sibling names. Entry `i` is
|
||||
// the list of field names destructured by the same call-site arg
|
||||
// slot as the primary `formal_param_names[i]`, excluding the
|
||||
// primary name. Empty vec for non-destructured params; `None` for
|
||||
// callers that don't carry destructure info (legacy / test paths).
|
||||
// Drives the destructured-arg expansion in the per-param probe so
|
||||
// taint flow through sibling bindings is visible to summary
|
||||
// extraction (CVE-2026-25544 / @payloadcms/drizzle SQLi).
|
||||
formal_destructured_fields: Option<&[Vec<String>]>,
|
||||
) -> crate::summary::ssa_summary::SsaFuncSummary {
|
||||
use crate::summary::SinkSite;
|
||||
use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform};
|
||||
|
|
@ -159,13 +170,32 @@ pub fn extract_ssa_func_summary_full(
|
|||
/// Inner [`PathFact`] when the rv on this path is a one-arg
|
||||
/// variant constructor; [`None`] otherwise.
|
||||
variant_inner_fact: Option<crate::abstract_interp::PathFact>,
|
||||
/// `true` when the per-param probe's seeded parameter var_name
|
||||
/// is in this return block's exit `validated_must`. `false`
|
||||
/// for the baseline (no-seed) probe and for params not
|
||||
/// validated on this path. Drives
|
||||
/// `validated_params_to_return` summary extraction.
|
||||
param_validated_must: bool,
|
||||
}
|
||||
|
||||
// Helper: run a taint probe with a given global_seed and return
|
||||
// the aggregate return caps, sink events, joined return abstract,
|
||||
// and the per-return-block observation list used to derive
|
||||
// per-return-path transforms.
|
||||
let run_probe = |seed: HashMap<BindingKey, VarTaint>| -> (
|
||||
//
|
||||
// `probe_param_names` lists the seeded parameter's `var_name`
|
||||
// plus any destructured-binding siblings sharing the slot
|
||||
// (`None` for the baseline source-caps probe). When non-empty,
|
||||
// each return-block observation records whether ANY of those
|
||||
// names is in the exit state's `validated_must`, which feeds
|
||||
// `validated_params_to_return` summary extraction below. The
|
||||
// any-name semantics matches the slot-wide model: a destructured
|
||||
// formal `({ a, b, c })` represents one call-site slot, and any
|
||||
// sibling reaching `validated_must` proves the slot's caps were
|
||||
// narrowed before reaching the return.
|
||||
let run_probe = |seed: HashMap<BindingKey, VarTaint>,
|
||||
probe_param_names: Option<&[&str]>|
|
||||
-> (
|
||||
Cap,
|
||||
Vec<SsaTaintEvent>,
|
||||
Option<crate::abstract_interp::AbstractValue>,
|
||||
|
|
@ -313,6 +343,13 @@ pub fn extract_ssa_func_summary_full(
|
|||
// The hash is stable across runs for a given predicate
|
||||
// shape so call sites can compare paths deterministically.
|
||||
let (predicate_hash, known_true, known_false) = summarise_return_predicates(&exit);
|
||||
let param_validated_must = match probe_param_names {
|
||||
Some(names) => names.iter().any(|name| match interner.get(name) {
|
||||
Some(sym) => exit.validated_must.contains(sym),
|
||||
None => false,
|
||||
}),
|
||||
None => false,
|
||||
};
|
||||
per_return.push(ReturnBlockObs {
|
||||
derived_caps: block_derived_caps,
|
||||
param_caps: block_param_caps,
|
||||
|
|
@ -322,6 +359,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
abstract_value: block_abs,
|
||||
path_fact: block_path_fact,
|
||||
variant_inner_fact: block_variant_inner,
|
||||
param_validated_must,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -343,7 +381,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
// Abstract values don't depend on taint seeding, so the baseline probe
|
||||
// captures the function's intrinsic abstract return value.
|
||||
let (baseline_return_caps, _baseline_events, return_abstract, baseline_obs) =
|
||||
run_probe(HashMap::new());
|
||||
run_probe(HashMap::new(), None);
|
||||
let source_caps = baseline_return_caps;
|
||||
|
||||
// Per-return-path PathFact decomposition derived from the baseline
|
||||
|
|
@ -403,6 +441,12 @@ pub fn extract_ssa_func_summary_full(
|
|||
usize,
|
||||
SmallVec<[crate::summary::ssa_summary::ReturnPathTransform; 2]>,
|
||||
)> = Vec::new();
|
||||
// Parameter indices whose taint flow to the return is fully
|
||||
// validated by a dominating predicate on every return path.
|
||||
// Populated below by checking each per-param probe's return-block
|
||||
// exit states for `validated_must` containing the param's
|
||||
// var_name. Empty when no parameter is validated.
|
||||
let mut validated_params_to_return: SmallVec<[usize; 2]> = SmallVec::new();
|
||||
|
||||
for &(idx, ref var_name, _ssa_val) in ¶m_info {
|
||||
let mut seed = HashMap::new();
|
||||
|
|
@ -421,6 +465,37 @@ pub fn extract_ssa_func_summary_full(
|
|||
probe_taint.clone(),
|
||||
);
|
||||
|
||||
// Destructured-arg sibling expansion. When the formal at slot
|
||||
// `idx` destructures an object pattern (`({ column, operator,
|
||||
// value })`), the SSA body emits a separate [`SsaOp::Param`]
|
||||
// for every destructured binding (sequential indices > slot
|
||||
// count, since the closure-capture pass treats them as
|
||||
// free-identifier reads). The call-site only passes ONE arg
|
||||
// for the slot, so the engine never seeds the sibling Param
|
||||
// ops at runtime — but the per-parameter SUMMARY probe must
|
||||
// model "if this slot is tainted then every binding it
|
||||
// produced is tainted too". Seed each sibling's `var_name`
|
||||
// with the same caps the primary received. The probe-level
|
||||
// `validated_must` check below treats the slot as validated
|
||||
// when ANY sibling lands in `validated_must` on a return path.
|
||||
//
|
||||
// Closes the residual gap behind CVE-2026-25544 (PayloadCMS
|
||||
// `@payloadcms/drizzle` SQLi via `createJSONQuery({ value })`):
|
||||
// the validator helper `sanitizeValue(value, operator)` lives
|
||||
// inside the body and the probe needs to see `value` flow
|
||||
// through the `validated_params_to_return` channel before
|
||||
// suppressing the caller's sink.
|
||||
let slot_siblings: &[String] = formal_destructured_fields
|
||||
.and_then(|d| d.get(idx))
|
||||
.map(|v| v.as_slice())
|
||||
.unwrap_or(&[]);
|
||||
for sib in slot_siblings {
|
||||
seed.insert(
|
||||
BindingKey::new(sib.as_str(), BodyId(0)),
|
||||
probe_taint.clone(),
|
||||
);
|
||||
}
|
||||
|
||||
// Phantom-Param prefix seeding. SSA lowering of arrow / nested
|
||||
// function bodies often exposes free-identifier member-access
|
||||
// expressions (e.g. `file._source.uri`) as their own
|
||||
|
|
@ -437,13 +512,18 @@ pub fn extract_ssa_func_summary_full(
|
|||
// `formal_var_name + "."` with the same caps the formal param
|
||||
// received: semantically "if `file` is tainted, then every
|
||||
// observable field path on `file` is tainted too". Bounded
|
||||
// by SSA size; cap-equivalent to direct seeding.
|
||||
let prefix = format!("{}.", var_name);
|
||||
// by SSA size; cap-equivalent to direct seeding. Mirror this
|
||||
// for each destructured sibling (`value.foo` / `column.name`
|
||||
// member-projections inside the body).
|
||||
let prefixes: Vec<String> = std::iter::once(var_name.clone())
|
||||
.chain(slot_siblings.iter().cloned())
|
||||
.map(|n| format!("{}.", n))
|
||||
.collect();
|
||||
for block in &ssa.blocks {
|
||||
for inst in block.phis.iter().chain(block.body.iter()) {
|
||||
if let SsaOp::Param { .. } = &inst.op {
|
||||
if let Some(name) = inst.var_name.as_ref() {
|
||||
if name.starts_with(&prefix) {
|
||||
if prefixes.iter().any(|p| name.starts_with(p)) {
|
||||
seed.insert(
|
||||
BindingKey::new(name.as_str(), BodyId(0)),
|
||||
probe_taint.clone(),
|
||||
|
|
@ -454,7 +534,15 @@ pub fn extract_ssa_func_summary_full(
|
|||
}
|
||||
}
|
||||
|
||||
let (return_caps, events, _, per_return_obs) = run_probe(seed);
|
||||
// Build slot-wide name list for the validated_must check.
|
||||
// Primary first, then siblings, then heap-allocated owned
|
||||
// copies — `run_probe` only borrows for its inner loop.
|
||||
let mut slot_names: Vec<&str> = Vec::with_capacity(1 + slot_siblings.len());
|
||||
slot_names.push(var_name.as_str());
|
||||
for sib in slot_siblings {
|
||||
slot_names.push(sib.as_str());
|
||||
}
|
||||
let (return_caps, events, _, per_return_obs) = run_probe(seed, Some(slot_names.as_slice()));
|
||||
|
||||
// Subtract baseline source_caps, we only want param-contributed caps
|
||||
let param_return_caps = return_caps & !source_caps;
|
||||
|
|
@ -469,6 +557,44 @@ pub fn extract_ssa_func_summary_full(
|
|||
param_to_return.push((idx, transform));
|
||||
}
|
||||
|
||||
// Validated-param-to-return detection.
|
||||
//
|
||||
// When the per-param probe shows that the parameter's
|
||||
// `var_name` is in `validated_must` on every return path that
|
||||
// *carries the parameter's contributed caps*, record the
|
||||
// parameter as validated. The caller will mark each tainted
|
||||
// argument passed to this position — and the call's own
|
||||
// return value — as `validated_must` / `validated_may`, the
|
||||
// same way an inline `if (!regex.test(x)) throw` would
|
||||
// validate the surviving branch.
|
||||
//
|
||||
// Conservative gating:
|
||||
// * Skip when the param contributes no caps to the return,
|
||||
// a degenerate "validated but irrelevant" record.
|
||||
// * Skip when no return block was observed (probes that
|
||||
// diverged or hit `MAX_PROBE_PARAMS`).
|
||||
// * Require validation on every return path that *carries
|
||||
// param caps to the return*. Branches that return
|
||||
// constants (e.g. `if (x === null) return 'NULL'`) carry
|
||||
// no param taint and don't need a validation predicate.
|
||||
// * Require ≥1 path that actually validates the param.
|
||||
if !param_return_caps.is_empty() && !per_return_obs.is_empty() {
|
||||
let mut any_carrying_path = false;
|
||||
let all_carrying_validated = per_return_obs.iter().all(|obs| {
|
||||
let carries = !(obs.derived_caps & !source_caps).is_empty()
|
||||
|| !(obs.param_caps & !source_caps).is_empty();
|
||||
if carries {
|
||||
any_carrying_path = true;
|
||||
obs.param_validated_must
|
||||
} else {
|
||||
true
|
||||
}
|
||||
});
|
||||
if any_carrying_path && all_carrying_validated {
|
||||
validated_params_to_return.push(idx);
|
||||
}
|
||||
}
|
||||
|
||||
// Derive per-return-path decomposition. For each
|
||||
// observed return block, derive a `ReturnPathTransform` mirroring
|
||||
// the aggregate logic (prefer derived caps, fall back to param
|
||||
|
|
@ -694,6 +820,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
// extractor itself doesn't carry receiver-type info, the
|
||||
// caller patches it in.
|
||||
typed_call_receivers: Vec::new(),
|
||||
validated_params_to_return,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1641,6 +1641,7 @@ mod fanout_merge_tests {
|
|||
points_to: Default::default(),
|
||||
field_points_to: Default::default(),
|
||||
param_to_gate_filters: vec![],
|
||||
validated_params_to_return: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4331,6 +4331,7 @@ fn ssa_summary_identity_propagation() {
|
|||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
!summary.param_to_return.is_empty(),
|
||||
|
|
@ -4394,6 +4395,7 @@ fn ssa_summary_sanitizer_strips_bits() {
|
|||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
// Sanitizer should strip some bits
|
||||
for (_, transform) in &summary.param_to_return {
|
||||
|
|
@ -4450,6 +4452,7 @@ fn ssa_summary_source_adds_bits() {
|
|||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
!summary.source_caps.is_empty(),
|
||||
|
|
@ -4506,6 +4509,7 @@ fn ssa_summary_param_to_sink() {
|
|||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
!summary.param_to_sink.is_empty(),
|
||||
|
|
@ -6122,6 +6126,61 @@ async function handler(req) {
|
|||
);
|
||||
}
|
||||
|
||||
/// Regex-allowlist `<X>.test(value)` is recognised as a ValidationCall
|
||||
/// targeting the call's first argument (not the regex receiver).
|
||||
///
|
||||
/// Shape:
|
||||
///
|
||||
/// ```js
|
||||
/// const v = req.body.x;
|
||||
/// if (!SAFE_REGEX.test(v)) { throw }
|
||||
/// db.execute(v); // direct flow: should be silent
|
||||
/// ```
|
||||
///
|
||||
/// `classify_condition` returns ValidationCall for the `*regex*.test()`
|
||||
/// receiver shape (see `target_regex_test_first_arg` in path_state) and
|
||||
/// `extract_validation_target` overrides the default receiver-as-target
|
||||
/// rule to extract the call's first argument. Together with the
|
||||
/// existing CFG-level negation handling in `compute_succ_states` the
|
||||
/// false branch (continue) marks `v` as validated.
|
||||
///
|
||||
/// Motivated by Payload CVE-2026-25544
|
||||
/// (`if (!SAFE_STRING_REGEX.test(value)) throw`). Note: this test pins
|
||||
/// the direct-flow case; transitive validation through SSA-derived
|
||||
/// values (e.g. template-literal concat of `v` into `sql`) is a deeper
|
||||
/// gap tracked separately and not closed here.
|
||||
#[test]
|
||||
fn regex_test_allowlist_narrowing_clears_direct_flow() {
|
||||
let src = br#"
|
||||
const SAFE_REGEX = /^[\w]+$/;
|
||||
|
||||
async function handler(req) {
|
||||
const userValue = req.body.filter;
|
||||
if (!SAFE_REGEX.test(userValue)) {
|
||||
throw new Error('bad');
|
||||
}
|
||||
return await db.execute(userValue);
|
||||
}
|
||||
"#;
|
||||
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let file_cfg = parse_lang(src, "javascript", lang);
|
||||
let summaries = &file_cfg.summaries;
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
summaries,
|
||||
None,
|
||||
Lang::JavaScript,
|
||||
"test.js",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
findings.is_empty(),
|
||||
"regex.test allowlist narrowing should suppress direct-flow finding; got {} finding(s): {findings:?}",
|
||||
findings.len()
|
||||
);
|
||||
}
|
||||
|
||||
/// Regression: `extract_ssa_func_summary` must skip `all_validated`
|
||||
/// events when populating `param_to_sink` / `param_to_sink_param`.
|
||||
///
|
||||
|
|
@ -6205,6 +6264,282 @@ async function handler(req) {
|
|||
);
|
||||
}
|
||||
|
||||
/// Regression for CVE-2026-25544 deep fix
|
||||
/// (`validated_params_to_return` summary field): a helper that
|
||||
/// validates its parameter via a regex `.test(...)` allowlist and
|
||||
/// returns a string derived from the validated parameter must
|
||||
/// suppress the caller's downstream sink even when:
|
||||
/// * the caller binds the call result to a fresh variable
|
||||
/// (`const sql = sanitize(userValue)`), and
|
||||
/// * the helper's return is a *derived* template literal, not a
|
||||
/// pass-through of the parameter itself.
|
||||
///
|
||||
/// Sound because the helper only returns normally on the validating
|
||||
/// arm — control could not reach the post-call instruction unless
|
||||
/// the regex accepted the argument. Pinned by
|
||||
/// `propagate_validated_params_to_return` marking both the arg and
|
||||
/// the call result `validated_must` / `validated_may` so the sink's
|
||||
/// `all_validated` check fires.
|
||||
#[test]
|
||||
fn validated_params_to_return_suppresses_one_hop_helper_validator() {
|
||||
let src = br#"
|
||||
const SAFE_REGEX = /^[\w]+$/;
|
||||
|
||||
const sanitize = (value) => {
|
||||
if (!SAFE_REGEX.test(value)) throw new Error('bad');
|
||||
return `safe:${value}`;
|
||||
};
|
||||
|
||||
async function handler(req) {
|
||||
const userValue = req.body.filter;
|
||||
const sql = sanitize(userValue);
|
||||
db.execute(sql);
|
||||
}
|
||||
"#;
|
||||
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let file_cfg = parse_lang(src, "javascript", lang);
|
||||
let summaries = &file_cfg.summaries;
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
summaries,
|
||||
None,
|
||||
Lang::JavaScript,
|
||||
"test.js",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
findings.is_empty(),
|
||||
"regex.test allowlist inside helper must suppress caller sink; got {} finding(s)",
|
||||
findings.len()
|
||||
);
|
||||
}
|
||||
|
||||
/// Two-hop variant of
|
||||
/// `validated_params_to_return_suppresses_one_hop_helper_validator`:
|
||||
/// when the validator helper is itself wrapped by another helper
|
||||
/// that interpolates the validator's return into a template literal,
|
||||
/// summary extraction must still surface
|
||||
/// `validated_params_to_return` on the *outer* helper. This pins
|
||||
/// the second-pass re-extraction (via
|
||||
/// `re_extract_summaries_with_augment_view`) plus the OR-merge of
|
||||
/// `validated_params_to_return` in `merge_sink_fields`.
|
||||
#[test]
|
||||
fn validated_params_to_return_suppresses_two_hop_helper_validator() {
|
||||
let src = br#"
|
||||
const SAFE_REGEX = /^[\w]+$/;
|
||||
|
||||
const sanitize = (value) => {
|
||||
if (!SAFE_REGEX.test(value)) throw new Error('bad');
|
||||
return value;
|
||||
};
|
||||
|
||||
const buildQuery = (value) => {
|
||||
const s = sanitize(value);
|
||||
return s + '!';
|
||||
};
|
||||
|
||||
async function handler(req) {
|
||||
const userValue = req.body.filter;
|
||||
const sql = buildQuery(userValue);
|
||||
db.execute(sql);
|
||||
}
|
||||
"#;
|
||||
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let file_cfg = parse_lang(src, "javascript", lang);
|
||||
let summaries = &file_cfg.summaries;
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
summaries,
|
||||
None,
|
||||
Lang::JavaScript,
|
||||
"test.js",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
findings.is_empty(),
|
||||
"two-hop helper-validator must propagate validated_params_to_return through both helpers; got {} finding(s)",
|
||||
findings.len()
|
||||
);
|
||||
}
|
||||
|
||||
/// Companion to
|
||||
/// `validated_params_to_return_suppresses_one_hop_helper_validator`:
|
||||
/// same shape WITHOUT the regex.test guard inside the helper must
|
||||
/// still fire. Asserts the validated-flow propagation does not
|
||||
/// over-suppress when the helper does not actually validate.
|
||||
#[test]
|
||||
fn validated_params_to_return_does_not_suppress_unvalidated_helper() {
|
||||
let src = br#"
|
||||
const sanitize = (value) => {
|
||||
return `safe:${value}`;
|
||||
};
|
||||
|
||||
async function handler(req) {
|
||||
const userValue = req.body.filter;
|
||||
const sql = sanitize(userValue);
|
||||
db.execute(sql);
|
||||
}
|
||||
"#;
|
||||
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let file_cfg = parse_lang(src, "javascript", lang);
|
||||
let summaries = &file_cfg.summaries;
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
summaries,
|
||||
None,
|
||||
Lang::JavaScript,
|
||||
"test.js",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
!findings.is_empty(),
|
||||
"helper without regex guard must still flag the caller sink",
|
||||
);
|
||||
}
|
||||
|
||||
/// Regression: per-parameter summary probe must seed every
|
||||
/// destructured object-pattern sibling sharing a slot, not only the
|
||||
/// primary name picked by `extract_param_meta`. Without this, a
|
||||
/// helper that destructures its single argument as
|
||||
/// `({ value }) => …` cannot have `validated_params_to_return = [0]`
|
||||
/// proven, because the validator inside the body operates on the
|
||||
/// `value` binding while the probe only seeded the primary `value`
|
||||
/// (or any earlier sibling) of the object pattern. Closes the
|
||||
/// residual blocker for CVE-2026-25544 (PayloadCMS Drizzle SQLi).
|
||||
#[test]
|
||||
fn validated_params_to_return_suppresses_destructured_object_arg_helper() {
|
||||
let src = br#"
|
||||
const SAFE_REGEX = /^[\w]+$/;
|
||||
|
||||
const sanitize = (value) => {
|
||||
if (!SAFE_REGEX.test(value)) throw new Error('bad');
|
||||
return value;
|
||||
};
|
||||
|
||||
const buildQuery = ({ value }) => {
|
||||
const s = sanitize(value);
|
||||
return s + '!';
|
||||
};
|
||||
|
||||
async function handler(req) {
|
||||
const userValue = req.body.filter;
|
||||
const sql = buildQuery({ value: userValue });
|
||||
db.execute(sql);
|
||||
}
|
||||
"#;
|
||||
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let file_cfg = parse_lang(src, "javascript", lang);
|
||||
let summaries = &file_cfg.summaries;
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
summaries,
|
||||
None,
|
||||
Lang::JavaScript,
|
||||
"test.js",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
findings.is_empty(),
|
||||
"destructured object-pattern arg with regex.test allowlist inside the helper must suppress caller sink; got {} finding(s)",
|
||||
findings.len()
|
||||
);
|
||||
}
|
||||
|
||||
/// Regression: same coverage for TypeScript object-pattern formals
|
||||
/// (`required_parameter > pattern: object_pattern`). TS exposes the
|
||||
/// destructure under a wrapper required_parameter; JS exposes it as a
|
||||
/// direct child of formal_parameters. Both paths must surface
|
||||
/// destructured siblings to the per-parameter probe.
|
||||
#[test]
|
||||
fn validated_params_to_return_suppresses_destructured_object_arg_helper_ts() {
|
||||
let src = br#"
|
||||
const SAFE_REGEX = /^[\w]+$/;
|
||||
|
||||
const sanitize = (value: string): string => {
|
||||
if (!SAFE_REGEX.test(value)) throw new Error('bad');
|
||||
return value;
|
||||
};
|
||||
|
||||
const buildQuery = ({ value }: { value: string }): string => {
|
||||
const s = sanitize(value);
|
||||
return s + '!';
|
||||
};
|
||||
|
||||
async function handler(req: any) {
|
||||
const userValue = req.body.filter;
|
||||
const sql = buildQuery({ value: userValue });
|
||||
db.execute(sql);
|
||||
}
|
||||
"#;
|
||||
let lang = tree_sitter::Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT);
|
||||
let file_cfg = parse_lang(src, "typescript", lang);
|
||||
let summaries = &file_cfg.summaries;
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
summaries,
|
||||
None,
|
||||
Lang::TypeScript,
|
||||
"test.ts",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
findings.is_empty(),
|
||||
"TS destructured object-pattern arg with regex.test allowlist must suppress caller sink; got {} finding(s)",
|
||||
findings.len()
|
||||
);
|
||||
}
|
||||
|
||||
/// Regression: a destructured object-pattern formal with multiple
|
||||
/// fields must still propagate validated_params_to_return when the
|
||||
/// validation lives behind a sibling that is NOT the primary name
|
||||
/// returned by `extract_param_meta`. In CVE-2026-25544 the primary
|
||||
/// is `column` (first ident in `{ column, operator, pathSegments,
|
||||
/// value }`) but the validator gates `value` — without sibling
|
||||
/// seeding the probe never sees the validation.
|
||||
#[test]
|
||||
fn destructured_sibling_validation_propagates_through_summary() {
|
||||
let src = br#"
|
||||
const SAFE_REGEX = /^[\w]+$/;
|
||||
|
||||
const sanitize = (value) => {
|
||||
if (!SAFE_REGEX.test(value)) throw new Error('bad');
|
||||
return value;
|
||||
};
|
||||
|
||||
const buildQuery = ({ column, operator, value }) => {
|
||||
return `${column} ${operator} ${sanitize(value)}`;
|
||||
};
|
||||
|
||||
async function handler(req) {
|
||||
const userValue = req.body.filter;
|
||||
const sql = buildQuery({ column: 'col', operator: '=', value: userValue });
|
||||
db.execute(sql);
|
||||
}
|
||||
"#;
|
||||
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let file_cfg = parse_lang(src, "javascript", lang);
|
||||
let summaries = &file_cfg.summaries;
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
summaries,
|
||||
None,
|
||||
Lang::JavaScript,
|
||||
"test.js",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
findings.is_empty(),
|
||||
"destructured-sibling validation (validator binds non-primary slot binding) must propagate through summary; got {} finding(s)",
|
||||
findings.len()
|
||||
);
|
||||
}
|
||||
|
||||
/// Regression: `validate*`-named callees match
|
||||
/// `InputValidatorPolarity::ErrorReturning`, bare `if (err) throw`
|
||||
/// guards the success branch (false branch). `is_valid*`/`is_safe*`
|
||||
|
|
@ -6290,3 +6625,153 @@ const handler = (req) => {
|
|||
None,
|
||||
);
|
||||
}
|
||||
|
||||
/// JS arrow-function default parameters (`(a = {}, b = {}) => …`)
|
||||
/// are wrapped by tree-sitter in `assignment_pattern` nodes whose
|
||||
/// `left` field carries the actual identifier. Without
|
||||
/// `assignment_pattern` in `PARAM_CONFIG.param_node_kinds`, the
|
||||
/// param walker skipped them, producing a parameter-less summary
|
||||
/// for any function whose params have defaults. That broke
|
||||
/// cross-function `param_to_sink` propagation for shapes like
|
||||
/// Strapi `sendTemplatedEmail`. Motivated by CVE-2023-22621.
|
||||
#[test]
|
||||
fn cve_2023_22621_js_default_params_extracted() {
|
||||
use crate::cfg::extract_param_meta_for_test;
|
||||
let src = br#"
|
||||
const sendTemplatedEmail = (emailOptions = {}, emailTemplate = {}, data = {}) => {
|
||||
return emailTemplate;
|
||||
};
|
||||
"#;
|
||||
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&lang).unwrap();
|
||||
let tree = parser.parse(&src[..], None).unwrap();
|
||||
let root = tree.root_node();
|
||||
let mut arrow_node: Option<tree_sitter::Node> = None;
|
||||
fn find<'a>(n: tree_sitter::Node<'a>, out: &mut Option<tree_sitter::Node<'a>>) {
|
||||
if n.kind() == "arrow_function" {
|
||||
*out = Some(n);
|
||||
return;
|
||||
}
|
||||
let mut c = n.walk();
|
||||
for ch in n.named_children(&mut c) {
|
||||
find(ch, out);
|
||||
if out.is_some() {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
find(root, &mut arrow_node);
|
||||
let arrow = arrow_node.expect("arrow function not found");
|
||||
let params = extract_param_meta_for_test(arrow, "javascript", src);
|
||||
let names: Vec<String> = params.iter().map(|(n, _)| n.clone()).collect();
|
||||
assert_eq!(
|
||||
names,
|
||||
vec![
|
||||
"emailOptions".to_string(),
|
||||
"emailTemplate".to_string(),
|
||||
"data".to_string()
|
||||
],
|
||||
"expected all 3 default-valued arrow params extracted; got {:?}",
|
||||
names
|
||||
);
|
||||
}
|
||||
|
||||
/// `_.template(tainted)` is a server-side template injection sink:
|
||||
/// lodash compiles `<% ... %>` evaluate blocks into a JS Function,
|
||||
/// so attacker-controlled input becomes RCE at render time. Gate
|
||||
/// activates conservatively when arg 1 is missing (default lodash
|
||||
/// behavior is dangerous). Motivated by CVE-2023-22621 (Strapi).
|
||||
#[test]
|
||||
fn cve_2023_22621_lodash_template_fires_on_tainted_input() {
|
||||
let src = br#"
|
||||
const _ = require('lodash');
|
||||
const handler = (req, res) => {
|
||||
_.template(req.body.tpl);
|
||||
};
|
||||
"#;
|
||||
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let file_cfg = parse_lang(src, "javascript", lang);
|
||||
let summaries = &file_cfg.summaries;
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
summaries,
|
||||
None,
|
||||
Lang::JavaScript,
|
||||
"test.js",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
!findings.is_empty(),
|
||||
"expected taint flow on _.template(req.body.tpl); got 0 findings",
|
||||
);
|
||||
}
|
||||
|
||||
/// `_.template(tainted, { evaluate: false })` disables lodash's
|
||||
/// `<% ... %>` evaluate block compilation, so the call is no
|
||||
/// longer a code-execution sink. The gate's `keyword_name =
|
||||
/// "evaluate"` activation reads the literal value via the JS-side
|
||||
/// closure that walks the call's arg-1 object literal (since JS
|
||||
/// has no language-level keyword args). Motivated by Strapi's
|
||||
/// CVE-2023-22621 patch.
|
||||
#[test]
|
||||
fn cve_2023_22621_lodash_template_suppressed_by_evaluate_false() {
|
||||
let src = br#"
|
||||
const _ = require('lodash');
|
||||
const handler = (req, res) => {
|
||||
_.template(req.body.tpl, { evaluate: false });
|
||||
};
|
||||
"#;
|
||||
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let file_cfg = parse_lang(src, "javascript", lang);
|
||||
let summaries = &file_cfg.summaries;
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
summaries,
|
||||
None,
|
||||
Lang::JavaScript,
|
||||
"test.js",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
findings.is_empty(),
|
||||
"expected no taint flow when evaluate:false is set; got {} findings",
|
||||
findings.len(),
|
||||
);
|
||||
}
|
||||
|
||||
/// Double-call chained form `_.template(tainted)(data)` — the outer
|
||||
/// call's `function` field is itself a call_expression rather than
|
||||
/// the member-chain shape `find_chained_inner_call` was originally
|
||||
/// written for. The extension recognises the `f()()` pattern and
|
||||
/// rebinds gate classification to the inner call so the gated
|
||||
/// `_.template` fires even when wrapped in an immediate invocation
|
||||
/// of the compiled function. Motivated by CVE-2023-22621.
|
||||
#[test]
|
||||
fn cve_2023_22621_lodash_template_double_call_inner_rebinding() {
|
||||
let src = br#"
|
||||
const _ = require('lodash');
|
||||
const handler = (req, res) => {
|
||||
const tpl = req.body.tpl;
|
||||
_.template(tpl)({});
|
||||
};
|
||||
"#;
|
||||
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let file_cfg = parse_lang(src, "javascript", lang);
|
||||
let summaries = &file_cfg.summaries;
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
summaries,
|
||||
None,
|
||||
Lang::JavaScript,
|
||||
"test.js",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
!findings.is_empty(),
|
||||
"expected taint flow via double-call chain rebinding; got 0 findings",
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -483,6 +483,15 @@ pub struct AuthAnalysisConfig {
|
|||
pub admin_path_patterns: Vec<String>,
|
||||
pub admin_guard_names: Vec<String>,
|
||||
pub login_guard_names: Vec<String>,
|
||||
/// Typed-extractor wrapper names that prove the request passed
|
||||
/// route-level capability/policy enforcement (e.g. meilisearch's
|
||||
/// `GuardedData<ActionPolicy<X>, _>`). Per-language defaults set
|
||||
/// in `auth_analysis::config::build_auth_rules`; user nyx.toml
|
||||
/// entries are appended. Distinct from `login_guard_names` so the
|
||||
/// pattern (matched as last-segment + case-insensitive
|
||||
/// `starts_with`) doesn't pollute regular call recognition.
|
||||
#[serde(default)]
|
||||
pub policy_guard_names: Vec<String>,
|
||||
pub authorization_check_names: Vec<String>,
|
||||
pub mutation_indicator_names: Vec<String>,
|
||||
pub read_indicator_names: Vec<String>,
|
||||
|
|
@ -544,6 +553,7 @@ impl Default for AuthAnalysisConfig {
|
|||
admin_path_patterns: Vec::new(),
|
||||
admin_guard_names: Vec::new(),
|
||||
login_guard_names: Vec::new(),
|
||||
policy_guard_names: Vec::new(),
|
||||
authorization_check_names: Vec::new(),
|
||||
mutation_indicator_names: Vec::new(),
|
||||
read_indicator_names: Vec::new(),
|
||||
|
|
@ -1075,6 +1085,10 @@ pub(crate) fn merge_configs(mut default: Config, user: Config) -> Config {
|
|||
&mut entry.auth.login_guard_names,
|
||||
user_lang_cfg.auth.login_guard_names,
|
||||
);
|
||||
extend_dedup(
|
||||
&mut entry.auth.policy_guard_names,
|
||||
user_lang_cfg.auth.policy_guard_names,
|
||||
);
|
||||
extend_dedup(
|
||||
&mut entry.auth.authorization_check_names,
|
||||
user_lang_cfg.auth.authorization_check_names,
|
||||
|
|
|
|||
|
|
@ -57,12 +57,67 @@ pub enum DetectedFramework {
|
|||
#[derive(Debug, Clone, Default)]
|
||||
pub struct FrameworkContext {
|
||||
pub frameworks: Vec<DetectedFramework>,
|
||||
/// Language ecosystems whose root manifest existed and was inspected.
|
||||
/// Lets `lang_has_web_framework` distinguish "no manifest at all" from
|
||||
/// "manifest present but listed no matching framework" — the second
|
||||
/// case is a positive signal that the project has no HTTP boundary in
|
||||
/// that language, the first is just absence-of-information.
|
||||
pub inspected_langs: std::collections::HashSet<&'static str>,
|
||||
}
|
||||
|
||||
impl FrameworkContext {
|
||||
pub fn has(&self, fw: DetectedFramework) -> bool {
|
||||
self.frameworks.contains(&fw)
|
||||
}
|
||||
|
||||
/// Three-valued web-framework presence query for a language slug.
|
||||
///
|
||||
/// * `Some(true)` ─ at least one framework for `lang` is in `frameworks`.
|
||||
/// * `Some(false)` ─ a manifest for `lang` was inspected but listed no
|
||||
/// matching framework. The project genuinely has no HTTP boundary
|
||||
/// in this language.
|
||||
/// * `None` ─ no manifest for `lang` was inspected (e.g. single-file
|
||||
/// scans without a project root). Caller should fall back to
|
||||
/// prior-behavior heuristics.
|
||||
pub fn lang_has_web_framework(&self, lang: &str) -> Option<bool> {
|
||||
let (frameworks_for_lang, manifest_lang_key): (&[DetectedFramework], &str) = match lang {
|
||||
"javascript" | "typescript" | "js" | "ts" => (
|
||||
&[
|
||||
DetectedFramework::Express,
|
||||
DetectedFramework::Koa,
|
||||
DetectedFramework::Fastify,
|
||||
],
|
||||
"node",
|
||||
),
|
||||
"python" | "py" => (
|
||||
&[DetectedFramework::Flask, DetectedFramework::Django],
|
||||
"python",
|
||||
),
|
||||
"java" => (&[DetectedFramework::Spring], "java"),
|
||||
"go" => (&[DetectedFramework::Gin, DetectedFramework::Echo], "go"),
|
||||
"ruby" | "rb" => (
|
||||
&[DetectedFramework::Rails, DetectedFramework::Sinatra],
|
||||
"ruby",
|
||||
),
|
||||
"php" => (&[DetectedFramework::Laravel], "php"),
|
||||
"rust" | "rs" => (
|
||||
&[
|
||||
DetectedFramework::Axum,
|
||||
DetectedFramework::ActixWeb,
|
||||
DetectedFramework::Rocket,
|
||||
],
|
||||
"rust",
|
||||
),
|
||||
_ => return None,
|
||||
};
|
||||
if frameworks_for_lang.iter().any(|fw| self.has(*fw)) {
|
||||
return Some(true);
|
||||
}
|
||||
if self.inspected_langs.contains(manifest_lang_key) {
|
||||
return Some(false);
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Maximum bytes to read from each manifest file.
|
||||
|
|
@ -135,17 +190,50 @@ pub fn detect_in_file_frameworks(bytes: &[u8], lang_slug: &str) -> Vec<DetectedF
|
|||
fws.push(DetectedFramework::Rails);
|
||||
}
|
||||
}
|
||||
// Rust is intentionally not handled here — adding axum / actix_web
|
||||
// / rocket detection here would also flip framework-conditional
|
||||
// *label* rules on for files in workspaces whose root Cargo.toml
|
||||
// doesn't list the crate (e.g. meilisearch's root, which carries
|
||||
// actix-web only in subcrates), and the existing actix label set
|
||||
// marks `HttpResponse.json` as a `Cap::HTML_ESCAPE` sink ─ a
|
||||
// pattern that fires on every actix route that echoes a path
|
||||
// parameter back to the client (legitimate behavior, not XSS).
|
||||
//
|
||||
// The auth-analysis path uses `auth_analysis::extract`'s own
|
||||
// per-file Rust check (see `compute_web_framework_signal`) so the
|
||||
// signal is available without touching the label augmentation.
|
||||
_ => {}
|
||||
}
|
||||
fws
|
||||
}
|
||||
|
||||
/// Coarse per-file signal: does the file's leading byte range mention
|
||||
/// at least one Rust web-framework symbol path (`axum::`, `actix_web::`,
|
||||
/// `rocket::`)? Used by [`crate::auth_analysis::extract`] to gate the
|
||||
/// `is_external_input_param_name` arm of `unit_has_user_input_evidence`
|
||||
/// without affecting framework-conditional *label* rules.
|
||||
///
|
||||
/// Returns `false` for non-Rust source.
|
||||
pub fn rust_file_imports_web_framework(bytes: &[u8]) -> bool {
|
||||
let head_len = bytes.len().min(8 * 1024);
|
||||
let head = match std::str::from_utf8(&bytes[..head_len]) {
|
||||
Ok(s) => s,
|
||||
Err(_) => return false,
|
||||
};
|
||||
head.contains("axum::")
|
||||
|| head.contains("axum_extra::")
|
||||
|| head.contains("actix_web::")
|
||||
|| head.contains("rocket::")
|
||||
}
|
||||
|
||||
/// Detect frameworks from manifest files in the project root.
|
||||
pub fn detect_frameworks(root: &Path) -> FrameworkContext {
|
||||
let mut fws = Vec::new();
|
||||
let mut inspected: std::collections::HashSet<&'static str> = std::collections::HashSet::new();
|
||||
|
||||
// ── Node.js (package.json) ──
|
||||
if let Some(content) = read_bounded(&root.join("package.json")) {
|
||||
inspected.insert("node");
|
||||
// Crude substring search in the "dependencies" block area.
|
||||
// Good enough for detection, no JSON parsing overhead.
|
||||
if content.contains("\"express\"") {
|
||||
|
|
@ -169,6 +257,7 @@ pub fn detect_frameworks(root: &Path) -> FrameworkContext {
|
|||
// ── Python ──
|
||||
for name in &["requirements.txt", "Pipfile", "pyproject.toml"] {
|
||||
if let Some(content) = read_bounded(&root.join(name)) {
|
||||
inspected.insert("python");
|
||||
let lower = content.to_ascii_lowercase();
|
||||
if lower.contains("flask") && !fws.contains(&DetectedFramework::Flask) {
|
||||
fws.push(DetectedFramework::Flask);
|
||||
|
|
@ -182,6 +271,7 @@ pub fn detect_frameworks(root: &Path) -> FrameworkContext {
|
|||
// ── Java (Maven / Gradle) ──
|
||||
for name in &["pom.xml", "build.gradle", "build.gradle.kts"] {
|
||||
if let Some(content) = read_bounded(&root.join(name)) {
|
||||
inspected.insert("java");
|
||||
if (content.contains("spring-boot") || content.contains("spring-web"))
|
||||
&& !fws.contains(&DetectedFramework::Spring)
|
||||
{
|
||||
|
|
@ -192,6 +282,7 @@ pub fn detect_frameworks(root: &Path) -> FrameworkContext {
|
|||
|
||||
// ── Go (go.mod) ──
|
||||
if let Some(content) = read_bounded(&root.join("go.mod")) {
|
||||
inspected.insert("go");
|
||||
if content.contains("gin-gonic/gin") {
|
||||
fws.push(DetectedFramework::Gin);
|
||||
}
|
||||
|
|
@ -202,6 +293,7 @@ pub fn detect_frameworks(root: &Path) -> FrameworkContext {
|
|||
|
||||
// ── PHP (composer.json) ──
|
||||
if let Some(content) = read_bounded(&root.join("composer.json")) {
|
||||
inspected.insert("php");
|
||||
if content.contains("laravel/framework") {
|
||||
fws.push(DetectedFramework::Laravel);
|
||||
}
|
||||
|
|
@ -209,6 +301,7 @@ pub fn detect_frameworks(root: &Path) -> FrameworkContext {
|
|||
|
||||
// ── Ruby (Gemfile) ──
|
||||
if let Some(content) = read_bounded(&root.join("Gemfile")) {
|
||||
inspected.insert("ruby");
|
||||
if content.contains("'rails'") || content.contains("\"rails\"") {
|
||||
fws.push(DetectedFramework::Rails);
|
||||
}
|
||||
|
|
@ -219,6 +312,7 @@ pub fn detect_frameworks(root: &Path) -> FrameworkContext {
|
|||
|
||||
// ── Rust (Cargo.toml) ──
|
||||
if let Some(content) = read_bounded(&root.join("Cargo.toml")) {
|
||||
inspected.insert("rust");
|
||||
if content.contains("actix-web") {
|
||||
fws.push(DetectedFramework::ActixWeb);
|
||||
}
|
||||
|
|
@ -230,7 +324,10 @@ pub fn detect_frameworks(root: &Path) -> FrameworkContext {
|
|||
}
|
||||
}
|
||||
|
||||
FrameworkContext { frameworks: fws }
|
||||
FrameworkContext {
|
||||
frameworks: fws,
|
||||
inspected_langs: inspected,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -477,6 +574,57 @@ fn framework_context_has_is_false_for_absent_framework() {
|
|||
assert!(!ctx.has(DetectedFramework::Spring));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lang_has_web_framework_three_valued_for_rust() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let root = tmp.path();
|
||||
// Cargo.toml present, no axum / actix-web / rocket → Some(false).
|
||||
fs::write(root.join("Cargo.toml"), "[dependencies]\nserde = \"1\"\n").unwrap();
|
||||
let ctx = detect_frameworks(root);
|
||||
assert_eq!(ctx.lang_has_web_framework("rust"), Some(false));
|
||||
assert_eq!(ctx.lang_has_web_framework("python"), None);
|
||||
|
||||
// Cargo.toml present and names axum → Some(true).
|
||||
fs::write(root.join("Cargo.toml"), "[dependencies]\naxum = \"0.7\"\n").unwrap();
|
||||
let ctx = detect_frameworks(root);
|
||||
assert_eq!(ctx.lang_has_web_framework("rust"), Some(true));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lang_has_web_framework_none_when_manifest_absent() {
|
||||
// No Cargo.toml at root → Rust manifest not inspected → None.
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let ctx = detect_frameworks(tmp.path());
|
||||
assert_eq!(ctx.lang_has_web_framework("rust"), None);
|
||||
assert_eq!(ctx.lang_has_web_framework("python"), None);
|
||||
assert_eq!(ctx.lang_has_web_framework("ruby"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rust_file_imports_web_framework_recognises_axum_actix_rocket() {
|
||||
assert!(rust_file_imports_web_framework(
|
||||
b"use axum::Router;\nfn main() {}\n"
|
||||
));
|
||||
assert!(rust_file_imports_web_framework(
|
||||
b"use actix_web::web;\nfn main() {}\n"
|
||||
));
|
||||
assert!(rust_file_imports_web_framework(
|
||||
b"use rocket::get;\nfn main() {}\n"
|
||||
));
|
||||
assert!(rust_file_imports_web_framework(
|
||||
b"use axum_extra::routing::RouterExt;\n"
|
||||
));
|
||||
// Not a web framework import → false.
|
||||
assert!(!rust_file_imports_web_framework(
|
||||
b"use std::path::Path;\nuse serde::Deserialize;\nfn main() {}\n"
|
||||
));
|
||||
// Bare crate name in a comment doesn't satisfy the `<crate>::`
|
||||
// path prefix — substring is conservative on purpose.
|
||||
assert!(!rust_file_imports_web_framework(
|
||||
b"// migrating away from axum\nfn main() {}\n"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detect_in_file_frameworks_go_echo() {
|
||||
let src = b"package main\nimport (\n\t\"net/http\"\n\t\"github.com/labstack/echo/v4\"\n)\nfunc x() {}\n";
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue